From abeae6f01a7ecc34edc251c3f08b6dfd41d3dd02 Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Thu, 12 Dec 2024 20:53:52 +0800
Subject: [PATCH] [Feat][Deploy] Add ultrainfer and paddlex-hpi (#2625)

---
 .precommit/check_custom.py                    |   10 +-
 libs/paddlex-hpi/MANIFEST.in                  |    2 +
 libs/paddlex-hpi/README.md                    |    0
 libs/paddlex-hpi/pyproject.toml               |   18 +
 libs/paddlex-hpi/requirements.txt             |    7 +
 libs/paddlex-hpi/scripts/build_wheel.sh       |    3 +
 libs/paddlex-hpi/scripts/run_tests.sh         |    3 +
 libs/paddlex-hpi/src/paddlex_hpi/__init__.py  |   15 +
 libs/paddlex-hpi/src/paddlex_hpi/_config.py   |  218 +
 .../src/paddlex_hpi/_model_info.py            |   59 +
 .../src/paddlex_hpi/_utils/__init__.py        |   13 +
 .../src/paddlex_hpi/_utils/compat.py          |   20 +
 .../src/paddlex_hpi/_utils/misc.py            |   25 +
 .../src/paddlex_hpi/_utils/typing.py          |   24 +
 .../paddlex_hpi/model_info_collection.json    | 4422 +++++++++++++++++
 .../src/paddlex_hpi/models/__init__.py        |   51 +
 .../paddlex_hpi/models/anomaly_detection.py   |   56 +
 .../src/paddlex_hpi/models/base.py            |  189 +
 .../paddlex_hpi/models/face_recognition.py    |   23 +
 .../paddlex_hpi/models/formula_recognition.py |   56 +
 .../paddlex_hpi/models/general_recognition.py |   56 +
 .../models/image_classification.py            |   91 +
 .../src/paddlex_hpi/models/image_unwarping.py |   56 +
 .../models/instance_segmentation.py           |  105 +
 .../models/multilabel_classification.py       |   80 +
 .../paddlex_hpi/models/object_detection.py    |   98 +
 .../models/semantic_segmentation.py           |   56 +
 .../paddlex_hpi/models/table_recognition.py   |   68 +
 .../src/paddlex_hpi/models/text_detection.py  |  167 +
 .../paddlex_hpi/models/text_recognition.py    |   86 +
 .../src/paddlex_hpi/models/ts_ad.py           |   58 +
 .../src/paddlex_hpi/models/ts_cls.py          |   55 +
 .../src/paddlex_hpi/models/ts_fc.py           |   58 +
 libs/paddlex-hpi/test_requirements.txt        |    2 +
 libs/paddlex-hpi/tests/__init__.py            |   13 +
 libs/paddlex-hpi/tests/models/__init__.py     |   13 +
 libs/paddlex-hpi/tests/models/base.py         |  117 +
 .../tests/models/test_anomaly_detection.py    |   49 +
 .../tests/models/test_formula_recognition.py  |   45 +
 .../tests/models/test_general_recognition.py  |   49 +
 .../tests/models/test_image_classification.py |   53 +
 .../tests/models/test_image_unwarping.py      |   51 +
 .../models/test_instance_segmentation.py      |   54 +
 .../models/test_multilabel_classification.py  |   53 +
 .../tests/models/test_object_detection.py     |   53 +
 .../models/test_semantic_segmentation.py      |   49 +
 .../tests/models/test_table_recognition.py    |   59 +
 .../tests/models/test_text_detection.py       |   47 +
 .../tests/models/test_text_recognition.py     |   52 +
 libs/paddlex-hpi/tests/models/test_ts_ad.py   |   49 +
 libs/paddlex-hpi/tests/models/test_ts_cls.py  |   50 +
 libs/paddlex-hpi/tests/models/test_ts_fc.py   |   51 +
 .../tests/testing_utils/__init__.py           |   13 +
 libs/paddlex-hpi/tests/testing_utils/cv.py    |   96 +
 .../tests/testing_utils/download.py           |  107 +
 libs/paddlex-hpi/tests/testing_utils/misc.py  |   19 +
 libs/ultrainfer/.gitignore                    |   54 +
 libs/ultrainfer/CMakeLists.txt                |  735 +++
 libs/ultrainfer/LICENSE                       |  201 +
 libs/ultrainfer/ThirdPartyNotices.txt         | 1946 ++++++++
 libs/ultrainfer/UltraInfer.cmake.in           |  335 ++
 libs/ultrainfer/UltraInferCSharp.cmake.in     |   13 +
 libs/ultrainfer/VERSION_NUMBER                |    1 +
 libs/ultrainfer/cmake/UltraInferConfig.cmake  |   10 +
 libs/ultrainfer/cmake/ascend.cmake            |   32 +
 libs/ultrainfer/cmake/build_paddle2onnx.cmake |   40 +
 libs/ultrainfer/cmake/build_tools.cmake       |   87 +
 libs/ultrainfer/cmake/check.cmake             |   45 +
 libs/ultrainfer/cmake/config_cpack.cmake      |   38 +
 libs/ultrainfer/cmake/cuda.cmake              |  283 ++
 libs/ultrainfer/cmake/cvcuda.cmake            |   41 +
 libs/ultrainfer/cmake/faiss.cmake             |  122 +
 libs/ultrainfer/cmake/fast_tokenizer.cmake    |  106 +
 libs/ultrainfer/cmake/flycv.cmake             |   97 +
 libs/ultrainfer/cmake/gflags.cmake            |   89 +
 libs/ultrainfer/cmake/glog.cmake              |   68 +
 libs/ultrainfer/cmake/gtest.cmake             |   84 +
 libs/ultrainfer/cmake/horizon.cmake           |   24 +
 libs/ultrainfer/cmake/kunlunxin.cmake         |   26 +
 libs/ultrainfer/cmake/onnxruntime.cmake       |  129 +
 libs/ultrainfer/cmake/opencv.cmake            |   90 +
 libs/ultrainfer/cmake/openvino.cmake          |  112 +
 libs/ultrainfer/cmake/paddle2onnx.cmake       |   90 +
 libs/ultrainfer/cmake/paddle_inference.cmake  |  329 ++
 libs/ultrainfer/cmake/paddlelite.cmake        |  105 +
 libs/ultrainfer/cmake/poros.cmake             |   95 +
 libs/ultrainfer/cmake/rknpu2.cmake            |   19 +
 libs/ultrainfer/cmake/sophgo.cmake            |    7 +
 libs/ultrainfer/cmake/summary.cmake           |   84 +
 libs/ultrainfer/cmake/timvx.cmake             |   38 +
 libs/ultrainfer/cmake/toolchain.cmake         |   45 +
 libs/ultrainfer/cmake/tvm.cmake               |   55 +
 libs/ultrainfer/cmake/utils.cmake             |  223 +
 libs/ultrainfer/cpack/debian_postinst.in      |   42 +
 libs/ultrainfer/cpack/debian_prerm.in         |   12 +
 libs/ultrainfer/cpack/rpm_postinst.in         |   35 +
 libs/ultrainfer/cpack/rpm_postrm.in           |    8 +
 libs/ultrainfer/python/__init__.py            |   13 +
 libs/ultrainfer/python/requirements.txt       |   15 +
 libs/ultrainfer/python/scripts/__init__.py    |   13 +
 libs/ultrainfer/python/scripts/build_gpu.sh   |   12 +
 .../python/scripts/process_libraries.py.in    |  207 +
 libs/ultrainfer/python/setup.py               |  485 ++
 libs/ultrainfer/python/ultrainfer/__init__.py |  186 +
 .../python/ultrainfer/c_lib_wrap.py.in        |  190 +
 libs/ultrainfer/python/ultrainfer/download.py |  274 +
 libs/ultrainfer/python/ultrainfer/model.py    |   88 +
 .../python/ultrainfer/pipeline/__init__.py    |   16 +
 .../pipeline/pptinypose/__init__.py           |   58 +
 .../python/ultrainfer/py_only/__init__.py     |   16 +
 .../python/ultrainfer/py_only/base.py         |   59 +
 .../python/ultrainfer/py_only/ts/__init__.py  |   16 +
 .../python/ultrainfer/py_only/ts/model.py     |   25 +
 .../ultrainfer/py_only/ts/processors.py       |  582 +++
 .../ultrainfer/py_only/vision/__init__.py     |   16 +
 .../python/ultrainfer/py_only/vision/model.py |   26 +
 .../ultrainfer/py_only/vision/processors.py   |  465 ++
 libs/ultrainfer/python/ultrainfer/runtime.py  |  706 +++
 .../python/ultrainfer/text/__init__.py        |   18 +
 .../python/ultrainfer/text/uie/__init__.py    |  105 +
 .../python/ultrainfer/ts/__init__.py          |   18 +
 .../ts/anomalydetection/__init__.py           |   16 +
 .../ts/anomalydetection/ppts/__init__.py      |  168 +
 .../ultrainfer/ts/classification/__init__.py  |   16 +
 .../ts/classification/ppts/__init__.py        |  128 +
 .../ultrainfer/ts/forecasting/__init__.py     |   16 +
 .../ts/forecasting/ppts/__init__.py           |  195 +
 .../python/ultrainfer/utils/__init__.py       |   14 +
 .../ultrainfer/utils/example_resource.py      |   26 +
 .../python/ultrainfer/utils/hub_config.py     |   76 +
 .../python/ultrainfer/utils/hub_env.py        |   57 +
 .../ultrainfer/utils/hub_model_server.py      |  134 +
 .../python/ultrainfer/utils/misc.py           |   20 +
 .../python/ultrainfer/vision/__init__.py      |   41 +
 .../vision/classification/__init__.py         |   36 +
 .../vision/classification/contrib/__init__.py |   15 +
 .../vision/classification/contrib/resnet.py   |  104 +
 .../classification/contrib/yolov5cls.py       |  140 +
 .../vision/classification/ppcls/__init__.py   |  288 ++
 .../vision/classification/ppshitu/__init__.py |  145 +
 .../ultrainfer/vision/common/__init__.py      |   18 +
 .../ultrainfer/vision/common/manager.py       |   69 +
 .../ultrainfer/vision/common/processors.py    |  152 +
 .../ultrainfer/vision/detection/__init__.py   |   30 +
 .../vision/detection/contrib/__init__.py      |   15 +
 .../vision/detection/contrib/fastestdet.py    |  157 +
 .../vision/detection/contrib/nanodet_plus.py  |  135 +
 .../detection/contrib/rkyolo/__init__.py      |   16 +
 .../detection/contrib/rkyolo/rkyolov5.py      |  315 ++
 .../vision/detection/contrib/scaled_yolov4.py |  146 +
 .../vision/detection/contrib/yolor.py         |  145 +
 .../vision/detection/contrib/yolov5.py        |  227 +
 .../vision/detection/contrib/yolov5lite.py    |  191 +
 .../vision/detection/contrib/yolov5seg.py     |  222 +
 .../vision/detection/contrib/yolov6.py        |  145 +
 .../vision/detection/contrib/yolov7.py        |  187 +
 .../detection/contrib/yolov7end2end_ort.py    |  132 +
 .../detection/contrib/yolov7end2end_trt.py    |  132 +
 .../vision/detection/contrib/yolov8.py        |  222 +
 .../vision/detection/contrib/yolox.py         |  130 +
 .../vision/detection/ppdet/__init__.py        |  990 ++++
 .../ultrainfer/vision/evaluation/__init__.py  |   17 +
 .../ultrainfer/vision/evaluation/classify.py  |   79 +
 .../ultrainfer/vision/evaluation/detection.py |  125 +
 .../vision/evaluation/segmentation.py         |  105 +
 .../vision/evaluation/utils/__init__.py       |   23 +
 .../vision/evaluation/utils/cityscapes.py     |   78 +
 .../vision/evaluation/utils/coco.py           |  176 +
 .../vision/evaluation/utils/coco_metrics.py   |   90 +
 .../vision/evaluation/utils/coco_utils.py     |  233 +
 .../vision/evaluation/utils/fd_logging.py     |   61 +
 .../vision/evaluation/utils/json_results.py   |  162 +
 .../vision/evaluation/utils/map_utils.py      |   42 +
 .../vision/evaluation/utils/seg_metrics.py    |  144 +
 .../vision/evaluation/utils/util.py           |   34 +
 .../ultrainfer/vision/facealign/__init__.py   |   18 +
 .../vision/facealign/contrib/__init__.py      |   15 +
 .../facealign/contrib/face_landmark_1000.py   |   76 +
 .../vision/facealign/contrib/pfld.py          |   76 +
 .../vision/facealign/contrib/pipnet.py        |  118 +
 .../ultrainfer/vision/facedet/__init__.py     |   22 +
 .../vision/facedet/contrib/__init__.py        |   15 +
 .../vision/facedet/contrib/blazeface.py       |  146 +
 .../vision/facedet/contrib/centerface.py      |  150 +
 .../vision/facedet/contrib/retinaface.py      |  134 +
 .../vision/facedet/contrib/scrfd.py           |  216 +
 .../vision/facedet/contrib/ultraface.py       |   75 +
 .../vision/facedet/contrib/yolov5face.py      |  147 +
 .../vision/facedet/contrib/yolov7face.py      |  193 +
 .../ultrainfer/vision/faceid/__init__.py      |   16 +
 .../vision/faceid/contrib/__init__.py         |   17 +
 .../vision/faceid/contrib/adaface/__init__.py |  109 +
 .../faceid/contrib/insightface/__init__.py    |  237 +
 .../ultrainfer/vision/generation/__init__.py  |   16 +
 .../vision/generation/contrib/__init__.py     |   15 +
 .../vision/generation/contrib/anemigan.py     |  103 +
 .../ultrainfer/vision/headpose/__init__.py    |   16 +
 .../vision/headpose/contrib/__init__.py       |   15 +
 .../vision/headpose/contrib/fsanet.py         |   76 +
 .../vision/keypointdetection/__init__.py      |   16 +
 .../keypointdetection/pptinypose/__init__.py  |   90 +
 .../ultrainfer/vision/matting/__init__.py     |   18 +
 .../vision/matting/contrib/__init__.py        |   15 +
 .../vision/matting/contrib/modnet.py          |  125 +
 .../ultrainfer/vision/matting/contrib/rvm.py  |  105 +
 .../vision/matting/ppmatting/__init__.py      |   55 +
 .../python/ultrainfer/vision/ocr/__init__.py  |   16 +
 .../ultrainfer/vision/ocr/ppocr/__init__.py   | 1928 +++++++
 .../vision/ocr/ppocr/utils/__init__.py        |   13 +
 .../ppocr/utils/ser_vi_layoutxlm/__init__.py  |   13 +
 .../ppocr/utils/ser_vi_layoutxlm/operators.py |  104 +
 .../utils/ser_vi_layoutxlm/transforms.py      |   47 +
 .../ppocr/utils/ser_vi_layoutxlm/vqa_utils.py |  624 +++
 .../ultrainfer/vision/perception/__init__.py  |   19 +
 .../vision/perception/paddle3d/__init__.py    |   15 +
 .../vision/perception/paddle3d/caddn.py       |  108 +
 .../vision/perception/paddle3d/centerpoint.py |   92 +
 .../vision/perception/paddle3d/petr.py        |  106 +
 .../vision/perception/paddle3d/smoke.py       |  106 +
 .../vision/segmentation/__init__.py           |   16 +
 .../vision/segmentation/ppseg/__init__.py     |  321 ++
 .../python/ultrainfer/vision/sr/__init__.py   |   15 +
 .../ultrainfer/vision/sr/ppsr/__init__.py     |  122 +
 .../ultrainfer/vision/tracking/__init__.py    |   21 +
 .../vision/tracking/pptracking/__init__.py    |   69 +
 .../python/ultrainfer/vision/utils.py         |  290 ++
 .../ultrainfer/vision/visualize/__init__.py   |  229 +
 libs/ultrainfer/scripts/__init__.py           |   13 +
 libs/ultrainfer/scripts/ascend_init.sh        |   13 +
 libs/ultrainfer/scripts/build_bcloud_lib.py   |   41 +
 libs/ultrainfer/scripts/clean_sdk.sh          |    7 +
 libs/ultrainfer/scripts/copy_directory.py     |   32 +
 libs/ultrainfer/scripts/linux/_build_cpp.sh   |   67 +
 libs/ultrainfer/scripts/linux/_build_py.sh    |   78 +
 .../linux/set_up_docker_and_build_cpp.sh      |   72 +
 .../linux/set_up_docker_and_build_py.sh       |   73 +
 libs/ultrainfer/scripts/patch_lib.sh          |   15 +
 .../scripts/patch_paddle_inference.py         |   52 +
 libs/ultrainfer/scripts/patch_paddle_lite.py  |   44 +
 libs/ultrainfer/scripts/ultrainfer_init.bat   |  167 +
 libs/ultrainfer/scripts/ultrainfer_init.sh    |   61 +
 libs/ultrainfer/ultrainfer/CMakeLists.txt     |    0
 .../ultrainfer/benchmark/benchmark.h          |   86 +
 libs/ultrainfer/ultrainfer/benchmark/option.h |   49 +
 .../ultrainfer/ultrainfer/benchmark/results.h |   28 +
 libs/ultrainfer/ultrainfer/benchmark/utils.cc |  908 ++++
 libs/ultrainfer/ultrainfer/benchmark/utils.h  |  204 +
 libs/ultrainfer/ultrainfer/core/allocate.cc   |   45 +
 libs/ultrainfer/ultrainfer/core/allocate.h    |   60 +
 libs/ultrainfer/ultrainfer/core/config.h.in   |   86 +
 libs/ultrainfer/ultrainfer/core/fd_scalar.h   |  121 +
 libs/ultrainfer/ultrainfer/core/fd_tensor.cc  |  447 ++
 libs/ultrainfer/ultrainfer/core/fd_tensor.h   |  216 +
 libs/ultrainfer/ultrainfer/core/fd_type.cc    |  137 +
 libs/ultrainfer/ultrainfer/core/fd_type.h     |   61 +
 libs/ultrainfer/ultrainfer/core/float16.h     |  651 +++
 libs/ultrainfer/ultrainfer/function/cast.cc   |   47 +
 libs/ultrainfer/ultrainfer/function/cast.h    |   31 +
 libs/ultrainfer/ultrainfer/function/clip.cc   |   59 +
 libs/ultrainfer/ultrainfer/function/clip.h    |   33 +
 libs/ultrainfer/ultrainfer/function/concat.cc |  118 +
 libs/ultrainfer/ultrainfer/function/concat.h  |   32 +
 .../ultrainfer/function/cuda_cast.cu          |   46 +
 .../ultrainfer/function/cuda_cast.h           |   29 +
 .../ultrainfer/ultrainfer/function/cumprod.cc |   78 +
 libs/ultrainfer/ultrainfer/function/cumprod.h |   31 +
 libs/ultrainfer/ultrainfer/function/eigen.cc  |   33 +
 libs/ultrainfer/ultrainfer/function/eigen.h   |  139 +
 .../ultrainfer/function/elementwise.cc        |  110 +
 .../ultrainfer/function/elementwise.h         |  105 +
 .../ultrainfer/function/elementwise_base.h    |  265 +
 .../ultrainfer/function/elementwise_functor.h |  131 +
 libs/ultrainfer/ultrainfer/function/full.cc   |   42 +
 libs/ultrainfer/ultrainfer/function/full.h    |   44 +
 .../ultrainfer/function/functions.h           |   36 +
 .../function/gather_scatter_along_axis.cc     |  125 +
 .../function/gather_scatter_along_axis.h      |   33 +
 .../ultrainfer/function/gaussian_random.cc    |   46 +
 .../ultrainfer/function/gaussian_random.h     |   36 +
 .../ultrainfer/function/isfinite.cc           |  111 +
 .../ultrainfer/ultrainfer/function/isfinite.h |   47 +
 .../ultrainfer/function/linspace.cc           |   52 +
 .../ultrainfer/ultrainfer/function/linspace.h |   33 +
 libs/ultrainfer/ultrainfer/function/math.cc   |   84 +
 libs/ultrainfer/ultrainfer/function/math.h    |   70 +
 .../ultrainfer/function/math_functor.h        |   81 +
 libs/ultrainfer/ultrainfer/function/pad.cc    |  119 +
 libs/ultrainfer/ultrainfer/function/pad.h     |   32 +
 .../ultrainfer/function/quantile.cc           |  130 +
 .../ultrainfer/ultrainfer/function/quantile.h |   34 +
 libs/ultrainfer/ultrainfer/function/reduce.cc |  414 ++
 libs/ultrainfer/ultrainfer/function/reduce.h  |  127 +
 .../ultrainfer/function/reduce_functor.h      |   77 +
 libs/ultrainfer/ultrainfer/function/slice.cc  |  182 +
 libs/ultrainfer/ultrainfer/function/slice.h   |   44 +
 .../ultrainfer/ultrainfer/function/softmax.cc |  125 +
 libs/ultrainfer/ultrainfer/function/softmax.h |   29 +
 libs/ultrainfer/ultrainfer/function/sort.cc   |  120 +
 libs/ultrainfer/ultrainfer/function/sort.h    |   47 +
 libs/ultrainfer/ultrainfer/function/split.cc  |  160 +
 libs/ultrainfer/ultrainfer/function/split.h   |   36 +
 libs/ultrainfer/ultrainfer/function/tile.cc   |  111 +
 libs/ultrainfer/ultrainfer/function/tile.h    |   36 +
 .../ultrainfer/function/transpose.cc          |  123 +
 .../ultrainfer/function/transpose.h           |   33 +
 libs/ultrainfer/ultrainfer/pipeline.h         |   21 +
 .../ultrainfer/pipeline/pipeline_pybind.cc    |   22 +
 .../pipeline/pptinypose/pipeline.cc           |   70 +
 .../ultrainfer/pipeline/pptinypose/pipeline.h |   70 +
 .../pptinypose/pptinyposepipeline_pybind.cc   |   36 +
 .../ultrainfer/pybind/fastdeploy_model.cc     |   42 +
 .../ultrainfer/ultrainfer/pybind/fd_tensor.cc |  293 ++
 libs/ultrainfer/ultrainfer/pybind/main.cc.in  |  181 +
 libs/ultrainfer/ultrainfer/pybind/main.h      |  135 +
 libs/ultrainfer/ultrainfer/pybind/runtime.cc  |  172 +
 libs/ultrainfer/ultrainfer/runtime.h          |   23 +
 .../ultrainfer/runtime/backends/backend.h     |  158 +
 .../common/cuda/adaptive_pool2d_kernel.cu     |   99 +
 .../common/cuda/adaptive_pool2d_kernel.h      |   35 +
 .../backends/horizon/horizon_backend.cc       |  399 ++
 .../backends/horizon/horizon_backend.h        |   67 +
 .../backends/lite/configure_hardware.cc       |  171 +
 .../runtime/backends/lite/lite_backend.cc     |  298 ++
 .../runtime/backends/lite/lite_backend.h      |   76 +
 .../ultrainfer/runtime/backends/lite/option.h |  103 +
 .../runtime/backends/lite/option_pybind.cc    |   64 +
 .../runtime/backends/openvino/option.h        |  100 +
 .../backends/openvino/option_pybind.cc        |   35 +
 .../runtime/backends/openvino/ov_backend.cc   |  457 ++
 .../runtime/backends/openvino/ov_backend.h    |   72 +
 .../backends/ort/ops/adaptive_pool2d.cc       |  125 +
 .../backends/ort/ops/adaptive_pool2d.h        |   86 +
 .../backends/ort/ops/multiclass_nms.cc        |  287 ++
 .../runtime/backends/ort/ops/multiclass_nms.h |   80 +
 .../ultrainfer/runtime/backends/ort/option.h  |   57 +
 .../runtime/backends/ort/option_pybind.cc     |   37 +
 .../runtime/backends/ort/ort_backend.cc       |  455 ++
 .../runtime/backends/ort/ort_backend.h        |   91 +
 .../ultrainfer/runtime/backends/ort/utils.cc  |   80 +
 .../ultrainfer/runtime/backends/ort/utils.h   |   39 +
 .../paddle/ops/centerpoint_postprocess_op.cc  |  124 +
 .../paddle/ops/centerpoint_postprocess_op.cu  |  295 ++
 .../backends/paddle/ops/grid_sample_3d.cc     |  100 +
 .../backends/paddle/ops/grid_sample_3d.cu     |  658 +++
 .../backends/paddle/ops/grid_sample_3d.h      |   33 +
 .../runtime/backends/paddle/ops/iou3d_cpu.cc  |  272 +
 .../runtime/backends/paddle/ops/iou3d_cpu.h   |   35 +
 .../runtime/backends/paddle/ops/iou3d_nms.cc  |  241 +
 .../runtime/backends/paddle/ops/iou3d_nms.h   |   45 +
 .../backends/paddle/ops/iou3d_nms_api.cc      |   56 +
 .../backends/paddle/ops/iou3d_nms_kernel.cu   |  588 +++
 .../backends/paddle/ops/voxelize_op.cc        |  208 +
 .../backends/paddle/ops/voxelize_op.cu        |  357 ++
 .../runtime/backends/paddle/option.h          |  169 +
 .../runtime/backends/paddle/option_pybind.cc  |   72 +
 .../runtime/backends/paddle/paddle_backend.cc |  650 +++
 .../runtime/backends/paddle/paddle_backend.h  |  103 +
 .../runtime/backends/paddle/util.cc           |  236 +
 .../runtime/backends/poros/common/compile.h   |  170 +
 .../runtime/backends/poros/common/iengine.h   |   82 +
 .../backends/poros/common/plugin_create.h     |   69 +
 .../backends/poros/common/poros_module.h      |   60 +
 .../runtime/backends/poros/option.h           |   46 +
 .../runtime/backends/poros/option_pybind.cc   |   37 +
 .../runtime/backends/poros/poros_backend.cc   |  175 +
 .../runtime/backends/poros/poros_backend.h    |   91 +
 .../runtime/backends/poros/utils.cc           |  185 +
 .../runtime/backends/rknpu2/option.h          |   48 +
 .../runtime/backends/rknpu2/rknpu2_backend.cc |  593 +++
 .../runtime/backends/rknpu2/rknpu2_backend.h  |  180 +
 .../backends/rknpu2/rknpu2_config_pybind.cc   |   37 +
 .../runtime/backends/sophgo/option.h          |   25 +
 .../runtime/backends/sophgo/sophgo_backend.cc |  304 ++
 .../runtime/backends/sophgo/sophgo_backend.h  |   71 +
 .../ultrainfer/runtime/backends/tvm/option.h  |   21 +
 .../runtime/backends/tvm/tvm_backend.cc       |  204 +
 .../runtime/backends/tvm/tvm_backend.h        |   61 +
 .../ultrainfer/runtime/enum_variables.cc      |  145 +
 .../ultrainfer/runtime/enum_variables.h       |  148 +
 .../ultrainfer/runtime/option_pybind.cc       |   83 +
 libs/ultrainfer/ultrainfer/runtime/runtime.cc |  431 ++
 libs/ultrainfer/ultrainfer/runtime/runtime.h  |  126 +
 .../ultrainfer/runtime/runtime_option.cc      |  524 ++
 .../ultrainfer/runtime/runtime_option.h       |  282 ++
 libs/ultrainfer/ultrainfer/text.h             |   19 +
 .../ultrainfer/text/common/option.h           |   26 +
 .../ultrainfer/text/common/result.cc          |   18 +
 .../ultrainfer/text/common/result.h           |   23 +
 .../text/postprocessor/postprocessor.cc       |   31 +
 .../text/postprocessor/postprocessor.h        |   34 +
 .../text/preprocessor/preprocessor.cc         |   32 +
 .../text/preprocessor/preprocessor.h          |   34 +
 libs/ultrainfer/ultrainfer/text/text_model.cc |   79 +
 libs/ultrainfer/ultrainfer/text/text_model.h  |   50 +
 .../ultrainfer/ultrainfer/text/text_pybind.cc |   63 +
 libs/ultrainfer/ultrainfer/text/uie/model.cc  |  797 +++
 libs/ultrainfer/ultrainfer/text/uie/model.h   |  210 +
 .../ultrainfer/text/uie/uie_pybind.cc         |   89 +
 .../ultrainfer/ultrainfer/ultrainfer_model.cc |  517 ++
 libs/ultrainfer/ultrainfer/ultrainfer_model.h |  189 +
 libs/ultrainfer/ultrainfer/utils/axis_utils.h |   52 +
 libs/ultrainfer/ultrainfer/utils/path.h       |   74 +
 libs/ultrainfer/ultrainfer/utils/perf.h       |   49 +
 libs/ultrainfer/ultrainfer/utils/unique_ptr.h |   55 +
 libs/ultrainfer/ultrainfer/utils/utils.cc     |   68 +
 libs/ultrainfer/ultrainfer/utils/utils.h      |  234 +
 libs/ultrainfer/ultrainfer/vision.h           |   80 +
 .../classification/classification_pybind.cc   |   34 +
 .../vision/classification/contrib/resnet.cc   |  135 +
 .../vision/classification/contrib/resnet.h    |   86 +
 .../classification/contrib/resnet_pybind.cc   |   39 +
 .../contrib/yolov5cls/postprocessor.cc        |   58 +
 .../contrib/yolov5cls/postprocessor.h         |   55 +
 .../contrib/yolov5cls/preprocessor.cc         |   91 +
 .../contrib/yolov5cls/preprocessor.h          |   57 +
 .../contrib/yolov5cls/yolov5cls.cc            |   83 +
 .../contrib/yolov5cls/yolov5cls.h             |   76 +
 .../contrib/yolov5cls/yolov5cls_pybind.cc     |  108 +
 .../vision/classification/ppcls/model.cc      |  123 +
 .../vision/classification/ppcls/model.h       |  128 +
 .../classification/ppcls/postprocessor.cc     |   57 +
 .../classification/ppcls/postprocessor.h      |   56 +
 .../classification/ppcls/ppcls_pybind.cc      |   99 +
 .../classification/ppcls/preprocessor.cc      |  156 +
 .../classification/ppcls/preprocessor.h       |   73 +
 .../classification/ppshitu/ppshitu_pybind.cc  |  101 +
 .../classification/ppshitu/ppshituv2_det.h    |   25 +
 .../classification/ppshitu/ppshituv2_rec.cc   |  121 +
 .../classification/ppshitu/ppshituv2_rec.h    |  117 +
 .../ppshitu/ppshituv2_rec_postprocessor.cc    |   58 +
 .../ppshitu/ppshituv2_rec_postprocessor.h     |   50 +
 .../ppshitu/ppshituv2_rec_preprocessor.cc     |  160 +
 .../ppshitu/ppshituv2_rec_preprocessor.h      |   73 +
 .../common/image_decoder/image_decoder.cc     |  112 +
 .../common/image_decoder/image_decoder.h      |   49 +
 .../common/image_decoder/nvjpeg_decoder.cc    |  364 ++
 .../common/image_decoder/nvjpeg_decoder.h     |   68 +
 .../vision/common/processors/base.cc          |  177 +
 .../vision/common/processors/base.h           |   93 +
 .../vision/common/processors/base_pybind.cc   |   28 +
 .../vision/common/processors/cast.cc          |  113 +
 .../vision/common/processors/cast.h           |   59 +
 .../vision/common/processors/cast_pybind.cc   |   22 +
 .../vision/common/processors/center_crop.cc   |  102 +
 .../vision/common/processors/center_crop.h    |   63 +
 .../common/processors/center_crop_pybind.cc   |   22 +
 .../common/processors/color_space_convert.cc  |  133 +
 .../common/processors/color_space_convert.h   |   99 +
 .../vision/common/processors/convert.cc       |   67 +
 .../vision/common/processors/convert.h        |   52 +
 .../common/processors/convert_and_permute.cc  |   96 +
 .../common/processors/convert_and_permute.h   |   85 +
 .../vision/common/processors/crop.cc          |   68 +
 .../vision/common/processors/crop.h           |   61 +
 .../vision/common/processors/cvcuda_utils.cc  |  127 +
 .../vision/common/processors/cvcuda_utils.h   |   40 +
 .../vision/common/processors/hwc2chw.cc       |   93 +
 .../vision/common/processors/hwc2chw.h        |   54 +
 .../common/processors/hwc2chw_pybind.cc       |   22 +
 .../common/processors/limit_by_stride.cc      |   86 +
 .../common/processors/limit_by_stride.h       |   54 +
 .../vision/common/processors/limit_short.cc   |   93 +
 .../vision/common/processors/limit_short.h    |   62 +
 .../vision/common/processors/manager.cc       |  102 +
 .../vision/common/processors/manager.h        |  104 +
 .../common/processors/manager_pybind.cc       |   57 +
 .../vision/common/processors/mat.cc           |  337 ++
 .../ultrainfer/vision/common/processors/mat.h |  176 +
 .../vision/common/processors/mat_batch.cc     |   92 +
 .../vision/common/processors/mat_batch.h      |   83 +
 .../common/processors/mat_batch_pybind.cc     |   30 +
 .../vision/common/processors/mat_pybind.cc    |   29 +
 .../vision/common/processors/normalize.cc     |  107 +
 .../vision/common/processors/normalize.cu     |  117 +
 .../vision/common/processors/normalize.h      |   90 +
 .../processors/normalize_and_permute.cc       |  124 +
 .../processors/normalize_and_permute.cu       |  134 +
 .../common/processors/normalize_and_permute.h |  107 +
 .../normalize_and_permute_pybind.cc           |   25 +
 .../common/processors/normalize_pybind.cc     |   24 +
 .../vision/common/processors/pad.cc           |  152 +
 .../ultrainfer/vision/common/processors/pad.h |   89 +
 .../vision/common/processors/pad_pybind.cc    |   23 +
 .../vision/common/processors/pad_to_size.cc   |  272 +
 .../vision/common/processors/pad_to_size.h    |   79 +
 .../common/processors/pad_to_size_pybind.cc   |   23 +
 .../vision/common/processors/proc_lib.cc      |   46 +
 .../vision/common/processors/proc_lib.h       |   34 +
 .../common/processors/processors_pybind.cc    |   48 +
 .../vision/common/processors/resize.cc        |  171 +
 .../vision/common/processors/resize.h         |   93 +
 .../common/processors/resize_by_short.cc      |  188 +
 .../common/processors/resize_by_short.h       |   74 +
 .../processors/resize_by_short_pybind.cc      |   23 +
 .../vision/common/processors/resize_pybind.cc |   23 +
 .../vision/common/processors/stride_pad.cc    |  186 +
 .../vision/common/processors/stride_pad.h     |   65 +
 .../common/processors/stride_pad_pybind.cc    |   22 +
 .../vision/common/processors/transform.cc     |  169 +
 .../vision/common/processors/transform.h      |   49 +
 .../vision/common/processors/utils.cc         |  279 ++
 .../vision/common/processors/utils.h          |   55 +
 .../vision/common/processors/warp_affine.cc   |   50 +
 .../vision/common/processors/warp_affine.h    |   61 +
 .../ultrainfer/vision/common/result.cc        |  944 ++++
 .../ultrainfer/vision/common/result.h         |  494 ++
 .../contrib/fastestdet/fastestdet.cc          |   82 +
 .../detection/contrib/fastestdet/fastestdet.h |   76 +
 .../contrib/fastestdet/fastestdet_pybind.cc   |  111 +
 .../contrib/fastestdet/postprocessor.cc       |  133 +
 .../contrib/fastestdet/postprocessor.h        |   68 +
 .../contrib/fastestdet/preprocessor.cc        |   84 +
 .../contrib/fastestdet/preprocessor.h         |   57 +
 .../vision/detection/contrib/nanodet_plus.cc  |  338 ++
 .../vision/detection/contrib/nanodet_plus.h   |  103 +
 .../detection/contrib/nanodet_plus_pybind.cc  |   40 +
 .../vision/detection/contrib/rknpu2/model.h   |  104 +
 .../detection/contrib/rknpu2/postprocessor.cc |  212 +
 .../detection/contrib/rknpu2/postprocessor.h  |  115 +
 .../detection/contrib/rknpu2/preprocessor.cc  |  109 +
 .../detection/contrib/rknpu2/preprocessor.h   |   99 +
 .../vision/detection/contrib/rknpu2/rkyolo.cc |   83 +
 .../vision/detection/contrib/rknpu2/rkyolo.h  |   65 +
 .../detection/contrib/rknpu2/rkyolo_pybind.cc |  163 +
 .../vision/detection/contrib/rknpu2/utils.cc  |   72 +
 .../vision/detection/contrib/rknpu2/utils.h   |   29 +
 .../vision/detection/contrib/scaledyolov4.cc  |  254 +
 .../vision/detection/contrib/scaledyolov4.h   |  101 +
 .../detection/contrib/scaledyolov4_pybind.cc  |   42 +
 .../vision/detection/contrib/yolor.cc         |  252 +
 .../vision/detection/contrib/yolor.h          |  101 +
 .../vision/detection/contrib/yolor_pybind.cc  |   38 +
 .../detection/contrib/yolov5/postprocessor.cc |  140 +
 .../detection/contrib/yolov5/postprocessor.h  |   74 +
 .../detection/contrib/yolov5/preprocessor.cc  |  119 +
 .../detection/contrib/yolov5/preprocessor.h   |  107 +
 .../vision/detection/contrib/yolov5/yolov5.cc |   97 +
 .../vision/detection/contrib/yolov5/yolov5.h  |   89 +
 .../detection/contrib/yolov5/yolov5_pybind.cc |  122 +
 .../vision/detection/contrib/yolov5lite.cc    |  471 ++
 .../vision/detection/contrib/yolov5lite.h     |  156 +
 .../detection/contrib/yolov5lite_pybind.cc    |   50 +
 .../contrib/yolov5seg/postprocessor.cc        |  217 +
 .../contrib/yolov5seg/postprocessor.h         |   78 +
 .../contrib/yolov5seg/preprocessor.cc         |  119 +
 .../contrib/yolov5seg/preprocessor.h          |  107 +
 .../detection/contrib/yolov5seg/yolov5seg.cc  |   83 +
 .../detection/contrib/yolov5seg/yolov5seg.h   |   76 +
 .../contrib/yolov5seg/yolov5seg_pybind.cc     |  122 +
 .../vision/detection/contrib/yolov6.cc        |  342 ++
 .../vision/detection/contrib/yolov6.h         |  125 +
 .../vision/detection/contrib/yolov6_pybind.cc |   42 +
 .../detection/contrib/yolov7/postprocessor.cc |  117 +
 .../detection/contrib/yolov7/postprocessor.h  |   67 +
 .../detection/contrib/yolov7/preprocessor.cc  |  119 +
 .../detection/contrib/yolov7/preprocessor.h   |   94 +
 .../vision/detection/contrib/yolov7/yolov7.cc |   94 +
 .../vision/detection/contrib/yolov7/yolov7.h  |   89 +
 .../detection/contrib/yolov7/yolov7_pybind.cc |  112 +
 .../detection/contrib/yolov7end2end_ort.cc    |  249 +
 .../detection/contrib/yolov7end2end_ort.h     |   92 +
 .../contrib/yolov7end2end_ort_pybind.cc       |   42 +
 .../detection/contrib/yolov7end2end_trt.cc    |  357 ++
 .../detection/contrib/yolov7end2end_trt.h     |  110 +
 .../contrib/yolov7end2end_trt_pybind.cc       |   46 +
 .../detection/contrib/yolov8/postprocessor.cc |  143 +
 .../detection/contrib/yolov8/postprocessor.h  |   74 +
 .../detection/contrib/yolov8/preprocessor.cc  |  119 +
 .../detection/contrib/yolov8/preprocessor.h   |  107 +
 .../vision/detection/contrib/yolov8/yolov8.cc |   82 +
 .../vision/detection/contrib/yolov8/yolov8.h  |   76 +
 .../detection/contrib/yolov8/yolov8_pybind.cc |  122 +
 .../vision/detection/contrib/yolox.cc         |  322 ++
 .../vision/detection/contrib/yolox.h          |  106 +
 .../vision/detection/contrib/yolox_pybind.cc  |   38 +
 .../vision/detection/detection_pybind.cc      |   54 +
 .../ultrainfer/vision/detection/ppdet/base.cc |  108 +
 .../ultrainfer/vision/detection/ppdet/base.h  |  100 +
 .../ultrainfer/vision/detection/ppdet/model.h |  508 ++
 .../vision/detection/ppdet/multiclass_nms.cc  |  227 +
 .../vision/detection/ppdet/multiclass_nms.h   |   77 +
 .../detection/ppdet/multiclass_nms_rotated.cc |  468 ++
 .../detection/ppdet/multiclass_nms_rotated.h  |   77 +
 .../vision/detection/ppdet/postprocessor.cc   |  362 ++
 .../vision/detection/ppdet/postprocessor.h    |  117 +
 .../vision/detection/ppdet/ppdet_pybind.cc    |  268 +
 .../vision/detection/ppdet/preprocessor.cc    |  228 +
 .../vision/detection/ppdet/preprocessor.h     |   71 +
 .../facealign/contrib/face_landmark_1000.cc   |  134 +
 .../facealign/contrib/face_landmark_1000.h    |   80 +
 .../contrib/face_landmark_1000_pybind.cc      |   34 +
 .../vision/facealign/contrib/pfld.cc          |  135 +
 .../vision/facealign/contrib/pfld.h           |   67 +
 .../vision/facealign/contrib/pfld_pybind.cc   |   31 +
 .../vision/facealign/contrib/pipnet.cc        |  687 +++
 .../vision/facealign/contrib/pipnet.h         |  133 +
 .../vision/facealign/contrib/pipnet_pybind.cc |   39 +
 .../vision/facealign/facealign_pybind.cc      |   29 +
 .../facedet/contrib/centerface/centerface.cc  |   88 +
 .../facedet/contrib/centerface/centerface.h   |   81 +
 .../contrib/centerface/centerface_pybind.cc   |  106 +
 .../contrib/centerface/postprocessor.cc       |  151 +
 .../contrib/centerface/postprocessor.h        |   68 +
 .../contrib/centerface/preprocessor.cc        |   81 +
 .../facedet/contrib/centerface/preprocessor.h |   59 +
 .../vision/facedet/contrib/retinaface.cc      |  293 ++
 .../vision/facedet/contrib/retinaface.h       |  104 +
 .../facedet/contrib/retinaface_pybind.cc      |   39 +
 .../vision/facedet/contrib/scrfd.cc           |  375 ++
 .../ultrainfer/vision/facedet/contrib/scrfd.h |  142 +
 .../vision/facedet/contrib/scrfd_pybind.cc    |   48 +
 .../vision/facedet/contrib/ultraface.cc       |  203 +
 .../vision/facedet/contrib/ultraface.h        |   83 +
 .../facedet/contrib/ultraface_pybind.cc       |   32 +
 .../vision/facedet/contrib/yolov5face.cc      |  280 ++
 .../vision/facedet/contrib/yolov5face.h       |  102 +
 .../facedet/contrib/yolov5face_pybind.cc      |   42 +
 .../contrib/yolov7face/postprocessor.cc       |  135 +
 .../contrib/yolov7face/postprocessor.h        |   76 +
 .../contrib/yolov7face/preprocessor.cc        |  123 +
 .../facedet/contrib/yolov7face/preprocessor.h |   98 +
 .../facedet/contrib/yolov7face/yolov7face.cc  |   89 +
 .../facedet/contrib/yolov7face/yolov7face.h   |   81 +
 .../contrib/yolov7face/yolov7face_pybind.cc   |  117 +
 .../vision/facedet/facedet_pybind.cc          |   37 +
 .../facedet/ppdet/blazeface/blazeface.cc      |   94 +
 .../facedet/ppdet/blazeface/blazeface.h       |   84 +
 .../ppdet/blazeface/blazeface_pybind.cc       |  102 +
 .../facedet/ppdet/blazeface/postprocessor.cc  |   96 +
 .../facedet/ppdet/blazeface/postprocessor.h   |   67 +
 .../facedet/ppdet/blazeface/preprocessor.cc   |  209 +
 .../facedet/ppdet/blazeface/preprocessor.h    |   70 +
 .../vision/faceid/contrib/adaface/adaface.cc  |   83 +
 .../vision/faceid/contrib/adaface/adaface.h   |   76 +
 .../faceid/contrib/adaface/adaface_pybind.cc  |  103 +
 .../faceid/contrib/adaface/postprocessor.cc   |   64 +
 .../faceid/contrib/adaface/postprocessor.h    |   51 +
 .../faceid/contrib/adaface/preprocessor.cc    |   76 +
 .../faceid/contrib/adaface/preprocessor.h     |   80 +
 .../vision/faceid/contrib/insightface/base.cc |   84 +
 .../vision/faceid/contrib/insightface/base.h  |   81 +
 .../contrib/insightface/insightface_pybind.cc |  138 +
 .../vision/faceid/contrib/insightface/model.h |  154 +
 .../contrib/insightface/postprocessor.cc      |   67 +
 .../contrib/insightface/postprocessor.h       |   52 +
 .../contrib/insightface/preprocessor.cc       |   79 +
 .../faceid/contrib/insightface/preprocessor.h |   84 +
 .../ultrainfer/vision/faceid/faceid_pybind.cc |   25 +
 .../vision/generation/contrib/animegan.cc     |   81 +
 .../vision/generation/contrib/animegan.h      |   79 +
 .../generation/contrib/animegan_pybind.cc     |   85 +
 .../generation/contrib/postprocessor.cc       |   50 +
 .../vision/generation/contrib/postprocessor.h |   42 +
 .../vision/generation/contrib/preprocessor.cc |   67 +
 .../vision/generation/contrib/preprocessor.h  |   42 +
 .../vision/generation/generation_pybind.cc    |   26 +
 .../vision/headpose/contrib/fsanet.cc         |  132 +
 .../vision/headpose/contrib/fsanet.h          |   68 +
 .../vision/headpose/contrib/fsanet_pybind.cc  |   31 +
 .../vision/headpose/headpose_pybind.cc        |   25 +
 .../vision/keypointdet/keypointdet_pybind.cc  |   26 +
 .../keypointdet/pptinypose/pptinypose.cc      |  283 ++
 .../keypointdet/pptinypose/pptinypose.h       |  116 +
 .../pptinypose/pptinypose_pybind.cc           |   50 +
 .../pptinypose/pptinypose_utils.cc            |  125 +
 .../keypointdet/pptinypose/pptinypose_utils.h |   51 +
 .../vision/matting/contrib/modnet.cc          |  155 +
 .../vision/matting/contrib/modnet.h           |   87 +
 .../vision/matting/contrib/modnet_pybind.cc   |   36 +
 .../ultrainfer/vision/matting/contrib/rvm.cc  |  183 +
 .../ultrainfer/vision/matting/contrib/rvm.h   |  101 +
 .../vision/matting/contrib/rvm_pybind.cc      |   38 +
 .../vision/matting/matting_pybind.cc          |   30 +
 .../vision/matting/ppmatting/ppmatting.cc     |  234 +
 .../vision/matting/ppmatting/ppmatting.h      |   75 +
 .../matting/ppmatting/ppmatting_pybind.cc     |   29 +
 .../ultrainfer/vision/ocr/ocr_pybind.cc       |   33 +
 .../ultrainfer/vision/ocr/ppocr/classifier.cc |  128 +
 .../ultrainfer/vision/ocr/ppocr/classifier.h  |  123 +
 .../vision/ocr/ppocr/cls_postprocessor.cc     |   84 +
 .../vision/ocr/ppocr/cls_postprocessor.h      |   54 +
 .../vision/ocr/ppocr/cls_preprocessor.cc      |  102 +
 .../vision/ocr/ppocr/cls_preprocessor.h       |   86 +
 .../vision/ocr/ppocr/dbcurvedetector.cc       |  124 +
 .../vision/ocr/ppocr/dbcurvedetector.h        |  118 +
 .../ultrainfer/vision/ocr/ppocr/dbdetector.cc |  122 +
 .../ultrainfer/vision/ocr/ppocr/dbdetector.h  |  115 +
 .../vision/ocr/ppocr/det_postprocessor.cc     |   98 +
 .../vision/ocr/ppocr/det_postprocessor.h      |   84 +
 .../ocr/ppocr/det_postprocessor_curve.cc      |  103 +
 .../ocr/ppocr/det_postprocessor_curve.h       |   89 +
 .../vision/ocr/ppocr/det_preprocessor.cc      |  106 +
 .../vision/ocr/ppocr/det_preprocessor.h       |  103 +
 .../vision/ocr/ppocr/ocrmodel_pybind.cc       |  748 +++
 .../vision/ocr/ppocr/ppocr_pybind.cc          |  147 +
 .../ultrainfer/vision/ocr/ppocr/ppocr_v2.cc   |  186 +
 .../ultrainfer/vision/ocr/ppocr/ppocr_v2.h    |  112 +
 .../ultrainfer/vision/ocr/ppocr/ppocr_v3.h    |   87 +
 .../ultrainfer/vision/ocr/ppocr/ppocr_v4.h    |   87 +
 .../vision/ocr/ppocr/ppstructurev2_layout.h   |   40 +
 .../vision/ocr/ppocr/ppstructurev2_table.cc   |  233 +
 .../vision/ocr/ppocr/ppstructurev2_table.h    |  101 +
 .../vision/ocr/ppocr/rec_postprocessor.cc     |  150 +
 .../vision/ocr/ppocr/rec_postprocessor.h      |   60 +
 .../vision/ocr/ppocr/rec_preprocessor.cc      |  142 +
 .../vision/ocr/ppocr/rec_preprocessor.h       |  101 +
 .../ultrainfer/vision/ocr/ppocr/recognizer.cc |  136 +
 .../ultrainfer/vision/ocr/ppocr/recognizer.h  |  122 +
 .../vision/ocr/ppocr/structurev2_layout.cc    |  102 +
 .../vision/ocr/ppocr/structurev2_layout.h     |  101 +
 .../ppocr/structurev2_layout_postprocessor.cc |  174 +
 .../ppocr/structurev2_layout_postprocessor.h  |   88 +
 .../ppocr/structurev2_layout_preprocessor.cc  |   72 +
 .../ppocr/structurev2_layout_preprocessor.h   |   90 +
 .../ocr/ppocr/structurev2_ser_vi_layoutxlm.cc |   72 +
 .../ocr/ppocr/structurev2_ser_vi_layoutxlm.h  |   67 +
 .../vision/ocr/ppocr/structurev2_table.cc     |  134 +
 .../vision/ocr/ppocr/structurev2_table.h      |  126 +
 .../ppocr/structurev2_table_postprocessor.cc  |  182 +
 .../ppocr/structurev2_table_postprocessor.h   |   73 +
 .../ppocr/structurev2_table_preprocessor.cc   |  106 +
 .../ppocr/structurev2_table_preprocessor.h    |   74 +
 .../vision/ocr/ppocr/utils/clipper.cc         | 4374 ++++++++++++++++
 .../vision/ocr/ppocr/utils/clipper.h          |  421 ++
 .../ocr/ppocr/utils/get_rotate_crop_image.cc  |   85 +
 .../vision/ocr/ppocr/utils/matcher.cc         |   89 +
 .../ocr/ppocr/utils/ocr_postprocess_op.cc     |  538 ++
 .../ocr/ppocr/utils/ocr_postprocess_op.h      |  107 +
 .../vision/ocr/ppocr/utils/ocr_utils.h        |   49 +
 .../vision/ocr/ppocr/utils/softmax.cc         |   51 +
 .../vision/ocr/ppocr/utils/sorted_boxes.cc    |   64 +
 .../vision/ocr/ppocr/uvdoc_postprocessor.cc   |   56 +
 .../vision/ocr/ppocr/uvdoc_postprocessor.h    |   40 +
 .../vision/ocr/ppocr/uvdoc_preprocessor.cc    |   45 +
 .../vision/ocr/ppocr/uvdoc_preprocessor.h     |   60 +
 .../vision/ocr/ppocr/uvdocwarpper.cc          |  101 +
 .../vision/ocr/ppocr/uvdocwarpper.h           |  104 +
 .../vision/perception/paddle3d/caddn/caddn.cc |   86 +
 .../vision/perception/paddle3d/caddn/caddn.h  |   81 +
 .../perception/paddle3d/caddn/caddn_pybind.cc |   96 +
 .../paddle3d/caddn/postprocessor.cc           |   70 +
 .../perception/paddle3d/caddn/postprocessor.h |   48 +
 .../perception/paddle3d/caddn/preprocessor.cc |  112 +
 .../perception/paddle3d/caddn/preprocessor.h  |   69 +
 .../paddle3d/centerpoint/centerpoint.cc       |   92 +
 .../paddle3d/centerpoint/centerpoint.h        |   81 +
 .../centerpoint/centerpoint_pybind.cc         |   56 +
 .../paddle3d/centerpoint/postprocessor.cc     |   71 +
 .../paddle3d/centerpoint/postprocessor.h      |   47 +
 .../paddle3d/centerpoint/preprocessor.cc      |  105 +
 .../paddle3d/centerpoint/preprocessor.h       |   57 +
 .../vision/perception/paddle3d/petr/petr.cc   |   92 +
 .../vision/perception/paddle3d/petr/petr.h    |   77 +
 .../perception/paddle3d/petr/petr_pybind.cc   |   92 +
 .../perception/paddle3d/petr/postprocessor.cc |   73 +
 .../perception/paddle3d/petr/postprocessor.h  |   48 +
 .../perception/paddle3d/petr/preprocessor.cc  |  114 +
 .../perception/paddle3d/petr/preprocessor.h   |   88 +
 .../paddle3d/smoke/postprocessor.cc           |   67 +
 .../perception/paddle3d/smoke/postprocessor.h |   48 +
 .../perception/paddle3d/smoke/preprocessor.cc |  161 +
 .../perception/paddle3d/smoke/preprocessor.h  |   62 +
 .../vision/perception/paddle3d/smoke/smoke.cc |   82 +
 .../vision/perception/paddle3d/smoke/smoke.h  |   77 +
 .../perception/paddle3d/smoke/smoke_pybind.cc |   92 +
 .../vision/perception/perception_pybind.cc    |   32 +
 .../vision/segmentation/ppseg/model.cc        |  103 +
 .../vision/segmentation/ppseg/model.h         |   99 +
 .../segmentation/ppseg/postprocessor.cc       |  291 ++
 .../vision/segmentation/ppseg/postprocessor.h |   89 +
 .../vision/segmentation/ppseg/ppseg_pybind.cc |  130 +
 .../vision/segmentation/ppseg/preprocessor.cc |  180 +
 .../vision/segmentation/ppseg/preprocessor.h  |   88 +
 .../segmentation/segmentation_pybind.cc       |   26 +
 .../ultrainfer/vision/sr/ppsr/basicvsr.cc     |   38 +
 .../ultrainfer/vision/sr/ppsr/basicvsr.h      |   43 +
 .../ultrainfer/vision/sr/ppsr/edvr.cc         |   73 +
 .../ultrainfer/vision/sr/ppsr/edvr.h          |   46 +
 .../ultrainfer/vision/sr/ppsr/model.h         |   18 +
 .../ultrainfer/vision/sr/ppsr/ppmsvsr.cc      |  130 +
 .../ultrainfer/vision/sr/ppsr/ppmsvsr.h       |   63 +
 .../ultrainfer/vision/sr/ppsr/ppsr_pybind.cc  |   79 +
 .../ultrainfer/vision/sr/sr_pybind.cc         |   25 +
 .../vision/tracking/pptracking/lapjv.cc       |  389 ++
 .../vision/tracking/pptracking/lapjv.h        |   62 +
 .../tracking/pptracking/letter_box_resize.cc  |  169 +
 .../tracking/pptracking/letter_box_resize.h   |   52 +
 .../vision/tracking/pptracking/model.cc       |  316 ++
 .../vision/tracking/pptracking/model.h        |  103 +
 .../tracking/pptracking/pptracking_pybind.cc  |   37 +
 .../vision/tracking/pptracking/tracker.cc     |  297 ++
 .../vision/tracking/pptracking/tracker.h      |   73 +
 .../vision/tracking/pptracking/trajectory.cc  |  529 ++
 .../vision/tracking/pptracking/trajectory.h   |  213 +
 .../vision/tracking/tracking_pybind.cc        |   25 +
 .../vision/utils/cosine_similarity.cc         |   48 +
 .../ultrainfer/vision/utils/crop_image.cc     |   61 +
 .../ultrainfer/vision/utils/cuda_utils.h      |   42 +
 .../ultrainfer/vision/utils/dark_parse.cc     |   81 +
 .../ultrainfer/vision/utils/face_align.cc     |  152 +
 .../ultrainfer/vision/utils/l2_normalize.cc   |   41 +
 .../ultrainfer/ultrainfer/vision/utils/nms.cc |  142 +
 .../ultrainfer/vision/utils/sort_det_res.cc   |  187 +
 .../vision/utils/sort_face_det_res.cc         |   69 +
 .../ultrainfer/vision/utils/utils.h           |  124 +
 .../vision/utils/yolo_preprocess.cu           |  153 +
 .../ultrainfer/vision/vision_pybind.cc        |  292 ++
 .../vision/visualize/classification.cc        |   96 +
 .../ultrainfer/vision/visualize/detection.cc  |  374 ++
 .../vision/visualize/face_alignment.cc        |   37 +
 .../vision/visualize/face_detection.cc        |  137 +
 .../ultrainfer/vision/visualize/headpose.cc   |   62 +
 .../ultrainfer/vision/visualize/keypoint.cc   |   57 +
 .../ultrainfer/vision/visualize/matting.cc    |  152 +
 .../ultrainfer/vision/visualize/mot.cc        |   79 +
 .../ultrainfer/vision/visualize/ocr.cc        |  122 +
 .../ultrainfer/vision/visualize/perception.cc |  195 +
 .../visualize/remove_small_connnected_area.cc |  112 +
 .../vision/visualize/segmentation.cc          |   75 +
 .../vision/visualize/segmentation_arm.cc      |  177 +
 .../vision/visualize/segmentation_arm.h       |   27 +
 .../vision/visualize/swap_background.cc       |  180 +
 .../vision/visualize/swap_background_arm.cc   |  238 +
 .../vision/visualize/swap_background_arm.h    |   32 +
 .../ultrainfer/vision/visualize/visualize.cc  |   67 +
 .../ultrainfer/vision/visualize/visualize.h   |  251 +
 .../vision/visualize/visualize_pybind.cc      |  256 +
 827 files changed, 106390 insertions(+), 3 deletions(-)
 create mode 100644 libs/paddlex-hpi/MANIFEST.in
 create mode 100644 libs/paddlex-hpi/README.md
 create mode 100644 libs/paddlex-hpi/pyproject.toml
 create mode 100644 libs/paddlex-hpi/requirements.txt
 create mode 100644 libs/paddlex-hpi/scripts/build_wheel.sh
 create mode 100644 libs/paddlex-hpi/scripts/run_tests.sh
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/__init__.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_config.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_model_info.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/base.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py
 create mode 100644 libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py
 create mode 100644 libs/paddlex-hpi/test_requirements.txt
 create mode 100644 libs/paddlex-hpi/tests/__init__.py
 create mode 100644 libs/paddlex-hpi/tests/models/__init__.py
 create mode 100644 libs/paddlex-hpi/tests/models/base.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_anomaly_detection.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_formula_recognition.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_general_recognition.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_image_classification.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_image_unwarping.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_instance_segmentation.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_multilabel_classification.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_object_detection.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_semantic_segmentation.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_table_recognition.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_text_detection.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_text_recognition.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_ts_ad.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_ts_cls.py
 create mode 100644 libs/paddlex-hpi/tests/models/test_ts_fc.py
 create mode 100644 libs/paddlex-hpi/tests/testing_utils/__init__.py
 create mode 100644 libs/paddlex-hpi/tests/testing_utils/cv.py
 create mode 100644 libs/paddlex-hpi/tests/testing_utils/download.py
 create mode 100644 libs/paddlex-hpi/tests/testing_utils/misc.py
 create mode 100644 libs/ultrainfer/.gitignore
 create mode 100755 libs/ultrainfer/CMakeLists.txt
 create mode 100755 libs/ultrainfer/LICENSE
 create mode 100755 libs/ultrainfer/ThirdPartyNotices.txt
 create mode 100755 libs/ultrainfer/UltraInfer.cmake.in
 create mode 100755 libs/ultrainfer/UltraInferCSharp.cmake.in
 create mode 100755 libs/ultrainfer/VERSION_NUMBER
 create mode 100755 libs/ultrainfer/cmake/UltraInferConfig.cmake
 create mode 100755 libs/ultrainfer/cmake/ascend.cmake
 create mode 100755 libs/ultrainfer/cmake/build_paddle2onnx.cmake
 create mode 100755 libs/ultrainfer/cmake/build_tools.cmake
 create mode 100755 libs/ultrainfer/cmake/check.cmake
 create mode 100755 libs/ultrainfer/cmake/config_cpack.cmake
 create mode 100755 libs/ultrainfer/cmake/cuda.cmake
 create mode 100755 libs/ultrainfer/cmake/cvcuda.cmake
 create mode 100755 libs/ultrainfer/cmake/faiss.cmake
 create mode 100755 libs/ultrainfer/cmake/fast_tokenizer.cmake
 create mode 100755 libs/ultrainfer/cmake/flycv.cmake
 create mode 100755 libs/ultrainfer/cmake/gflags.cmake
 create mode 100755 libs/ultrainfer/cmake/glog.cmake
 create mode 100755 libs/ultrainfer/cmake/gtest.cmake
 create mode 100755 libs/ultrainfer/cmake/horizon.cmake
 create mode 100755 libs/ultrainfer/cmake/kunlunxin.cmake
 create mode 100755 libs/ultrainfer/cmake/onnxruntime.cmake
 create mode 100755 libs/ultrainfer/cmake/opencv.cmake
 create mode 100755 libs/ultrainfer/cmake/openvino.cmake
 create mode 100755 libs/ultrainfer/cmake/paddle2onnx.cmake
 create mode 100755 libs/ultrainfer/cmake/paddle_inference.cmake
 create mode 100755 libs/ultrainfer/cmake/paddlelite.cmake
 create mode 100755 libs/ultrainfer/cmake/poros.cmake
 create mode 100755 libs/ultrainfer/cmake/rknpu2.cmake
 create mode 100755 libs/ultrainfer/cmake/sophgo.cmake
 create mode 100755 libs/ultrainfer/cmake/summary.cmake
 create mode 100755 libs/ultrainfer/cmake/timvx.cmake
 create mode 100755 libs/ultrainfer/cmake/toolchain.cmake
 create mode 100755 libs/ultrainfer/cmake/tvm.cmake
 create mode 100755 libs/ultrainfer/cmake/utils.cmake
 create mode 100755 libs/ultrainfer/cpack/debian_postinst.in
 create mode 100755 libs/ultrainfer/cpack/debian_prerm.in
 create mode 100755 libs/ultrainfer/cpack/rpm_postinst.in
 create mode 100755 libs/ultrainfer/cpack/rpm_postrm.in
 create mode 100755 libs/ultrainfer/python/__init__.py
 create mode 100755 libs/ultrainfer/python/requirements.txt
 create mode 100755 libs/ultrainfer/python/scripts/__init__.py
 create mode 100755 libs/ultrainfer/python/scripts/build_gpu.sh
 create mode 100755 libs/ultrainfer/python/scripts/process_libraries.py.in
 create mode 100755 libs/ultrainfer/python/setup.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in
 create mode 100755 libs/ultrainfer/python/ultrainfer/download.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/model.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/pipeline/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/base.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/ts/model.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/vision/model.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/runtime.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/text/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/text/uie/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/ts/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/utils/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/utils/example_resource.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/utils/hub_config.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/utils/hub_env.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/utils/misc.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/common/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/common/manager.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/common/processors.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/utils.py
 create mode 100755 libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py
 create mode 100755 libs/ultrainfer/scripts/__init__.py
 create mode 100755 libs/ultrainfer/scripts/ascend_init.sh
 create mode 100755 libs/ultrainfer/scripts/build_bcloud_lib.py
 create mode 100755 libs/ultrainfer/scripts/clean_sdk.sh
 create mode 100755 libs/ultrainfer/scripts/copy_directory.py
 create mode 100755 libs/ultrainfer/scripts/linux/_build_cpp.sh
 create mode 100755 libs/ultrainfer/scripts/linux/_build_py.sh
 create mode 100755 libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh
 create mode 100755 libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh
 create mode 100755 libs/ultrainfer/scripts/patch_lib.sh
 create mode 100755 libs/ultrainfer/scripts/patch_paddle_inference.py
 create mode 100755 libs/ultrainfer/scripts/patch_paddle_lite.py
 create mode 100755 libs/ultrainfer/scripts/ultrainfer_init.bat
 create mode 100755 libs/ultrainfer/scripts/ultrainfer_init.sh
 create mode 100755 libs/ultrainfer/ultrainfer/CMakeLists.txt
 create mode 100755 libs/ultrainfer/ultrainfer/benchmark/benchmark.h
 create mode 100755 libs/ultrainfer/ultrainfer/benchmark/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/benchmark/results.h
 create mode 100755 libs/ultrainfer/ultrainfer/benchmark/utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/benchmark/utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/core/allocate.cc
 create mode 100755 libs/ultrainfer/ultrainfer/core/allocate.h
 create mode 100755 libs/ultrainfer/ultrainfer/core/config.h.in
 create mode 100755 libs/ultrainfer/ultrainfer/core/fd_scalar.h
 create mode 100755 libs/ultrainfer/ultrainfer/core/fd_tensor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/core/fd_tensor.h
 create mode 100755 libs/ultrainfer/ultrainfer/core/fd_type.cc
 create mode 100755 libs/ultrainfer/ultrainfer/core/fd_type.h
 create mode 100755 libs/ultrainfer/ultrainfer/core/float16.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/cast.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/cast.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/clip.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/clip.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/concat.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/concat.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/cuda_cast.cu
 create mode 100755 libs/ultrainfer/ultrainfer/function/cuda_cast.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/cumprod.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/cumprod.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/eigen.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/eigen.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise_base.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/elementwise_functor.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/full.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/full.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/functions.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/gaussian_random.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/gaussian_random.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/isfinite.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/isfinite.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/linspace.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/linspace.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/math.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/math.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/math_functor.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/pad.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/pad.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/quantile.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/quantile.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/reduce.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/reduce.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/reduce_functor.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/slice.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/slice.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/softmax.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/softmax.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/sort.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/sort.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/split.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/split.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/tile.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/tile.h
 create mode 100755 libs/ultrainfer/ultrainfer/function/transpose.cc
 create mode 100755 libs/ultrainfer/ultrainfer/function/transpose.h
 create mode 100755 libs/ultrainfer/ultrainfer/pipeline.h
 create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc
 create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h
 create mode 100755 libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc
 create mode 100755 libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/pybind/main.cc.in
 create mode 100755 libs/ultrainfer/ultrainfer/pybind/main.h
 create mode 100755 libs/ultrainfer/ultrainfer/pybind/runtime.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/enum_variables.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/enum_variables.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/option_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime.h
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime_option.cc
 create mode 100755 libs/ultrainfer/ultrainfer/runtime/runtime_option.h
 create mode 100755 libs/ultrainfer/ultrainfer/text.h
 create mode 100755 libs/ultrainfer/ultrainfer/text/common/option.h
 create mode 100755 libs/ultrainfer/ultrainfer/text/common/result.cc
 create mode 100755 libs/ultrainfer/ultrainfer/text/common/result.h
 create mode 100755 libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/text/text_model.cc
 create mode 100755 libs/ultrainfer/ultrainfer/text/text_model.h
 create mode 100755 libs/ultrainfer/ultrainfer/text/text_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/text/uie/model.cc
 create mode 100755 libs/ultrainfer/ultrainfer/text/uie/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/ultrainfer_model.cc
 create mode 100755 libs/ultrainfer/ultrainfer/ultrainfer_model.h
 create mode 100755 libs/ultrainfer/ultrainfer/utils/axis_utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/utils/path.h
 create mode 100755 libs/ultrainfer/ultrainfer/utils/perf.h
 create mode 100755 libs/ultrainfer/ultrainfer/utils/unique_ptr.h
 create mode 100755 libs/ultrainfer/ultrainfer/utils/utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/utils/utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/base.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/base.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cast.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/crop.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/manager.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/transform.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/result.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/common/result.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/face_align.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/nms.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/utils.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu
 create mode 100755 libs/ultrainfer/ultrainfer/vision/vision_pybind.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/classification.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/detection.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/matting.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/mot.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/perception.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/visualize.h
 create mode 100755 libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc

diff --git a/.precommit/check_custom.py b/.precommit/check_custom.py
index db2cc50563..f336d5e98d 100644
--- a/.precommit/check_custom.py
+++ b/.precommit/check_custom.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import re
 import sys
 
@@ -40,9 +41,12 @@ def check(file_path):
     if not content.startswith(LICENSE_TEXT):
         print(f"License header missing in {file_path}")
         return False
-    if "import paddle" in content or "from paddle import " in content:
-        print(f"Please use `lazy_paddle` instead `paddle` when import in {file_path}")
-        return False
+    if "paddlex" in file_path.split(os.sep):
+        if "import paddle" in content or "from paddle import " in content:
+            print(
+                f"Please use `lazy_paddle` instead `paddle` when import in {file_path}"
+            )
+            return False
     return True
 
 
diff --git a/libs/paddlex-hpi/MANIFEST.in b/libs/paddlex-hpi/MANIFEST.in
new file mode 100644
index 0000000000..4ee1e6eca8
--- /dev/null
+++ b/libs/paddlex-hpi/MANIFEST.in
@@ -0,0 +1,2 @@
+include src/paddlex_hpi/py.typed
+include src/paddlex_hpi/model_info_collection.json
diff --git a/libs/paddlex-hpi/README.md b/libs/paddlex-hpi/README.md
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/libs/paddlex-hpi/pyproject.toml b/libs/paddlex-hpi/pyproject.toml
new file mode 100644
index 0000000000..3822b24ffe
--- /dev/null
+++ b/libs/paddlex-hpi/pyproject.toml
@@ -0,0 +1,18 @@
+[build-system]
+requires = ["setuptools >= 69"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "paddlex-hpi"
+version = "3.0.0.b2"
+description = ""
+readme = "README.md"
+authors = []
+dynamic = ["dependencies", "optional-dependencies"]
+
+[tool.setuptools]
+include-package-data = true
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+optional-dependencies.test = {file = ["test_requirements.txt"]}
diff --git a/libs/paddlex-hpi/requirements.txt b/libs/paddlex-hpi/requirements.txt
new file mode 100644
index 0000000000..2ac37f06f0
--- /dev/null
+++ b/libs/paddlex-hpi/requirements.txt
@@ -0,0 +1,7 @@
+# ultrainfer
+# paddlex
+importlib-resources >= 6.4
+numpy >= 1.21
+pandas >= 1.3.3
+pydantic >= 2
+typing-extensions >= 4.11
diff --git a/libs/paddlex-hpi/scripts/build_wheel.sh b/libs/paddlex-hpi/scripts/build_wheel.sh
new file mode 100644
index 0000000000..82f3001917
--- /dev/null
+++ b/libs/paddlex-hpi/scripts/build_wheel.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+python -m pip wheel -w wheels/original --no-deps .
diff --git a/libs/paddlex-hpi/scripts/run_tests.sh b/libs/paddlex-hpi/scripts/run_tests.sh
new file mode 100644
index 0000000000..acfaaa8e06
--- /dev/null
+++ b/libs/paddlex-hpi/scripts/run_tests.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+python -m pytest tests
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/__init__.py b/libs/paddlex-hpi/src/paddlex_hpi/__init__.py
new file mode 100644
index 0000000000..a2aacd841a
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = "3.0.0.beta2"
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_config.py b/libs/paddlex-hpi/src/paddlex_hpi/_config.py
new file mode 100644
index 0000000000..fa129c4911
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/_config.py
@@ -0,0 +1,218 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Mapping, Optional, Tuple, Type, Union
+
+import ultrainfer as ui
+from paddlex.utils import logging
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+from typing_extensions import Annotated, TypeAlias, TypedDict, assert_never
+
+from paddlex_hpi._model_info import get_model_info
+from paddlex_hpi._utils.typing import Backend, DeviceType
+
+
+class _BackendConfig(BaseModel):
+    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
+        raise NotImplementedError
+
+
+class PaddleInferConfig(_BackendConfig):
+    cpu_num_threads: int = 8
+    enable_mkldnn: bool = True
+    enable_trt: bool = False
+    trt_dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
+    trt_dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None
+    enable_log_info: bool = False
+
+    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
+        option.use_paddle_infer_backend()
+        option.set_cpu_thread_num(self.cpu_num_threads)
+        option.paddle_infer_option.enable_mkldnn = self.enable_mkldnn
+        option.paddle_infer_option.enable_trt = self.enable_trt
+        option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt")
+        if self.trt_dynamic_shapes is not None:
+            for name, shapes in self.trt_dynamic_shapes.items():
+                option.trt_option.set_shape(name, *shapes)
+        if self.trt_dynamic_shape_input_data is not None:
+            for name, data in self.trt_dynamic_shape_input_data.items():
+                option.trt_option.set_input_data(name, *data)
+        if self.enable_trt:
+            option.paddle_infer_option.collect_trt_shape = True
+            option.paddle_infer_option.collect_trt_shape_by_device = True
+        option.paddle_infer_option.enable_log_info = self.enable_log_info
+
+
+class OpenVINOConfig(_BackendConfig):
+    cpu_num_threads: int = 8
+
+    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
+        option.use_openvino_backend()
+        option.set_cpu_thread_num(self.cpu_num_threads)
+
+
+class ONNXRuntimeConfig(_BackendConfig):
+    cpu_num_threads: int = 8
+
+    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
+        option.use_ort_backend()
+        option.set_cpu_thread_num(self.cpu_num_threads)
+
+
+class TensorRTConfig(_BackendConfig):
+    dynamic_shapes: Optional[Dict[str, List[List[int]]]] = None
+
+    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
+        option.use_trt_backend()
+        option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt")
+        if self.dynamic_shapes is not None:
+            for name, shapes in self.dynamic_shapes.items():
+                option.trt_option.set_shape(name, *shapes)
+
+
+class PaddleTensorRTConfig(_BackendConfig):
+    dynamic_shapes: Dict[str, List[List[int]]]
+    dynamic_shape_input_data: Optional[Dict[str, List[List[float]]]] = None
+    enable_log_info: bool = False
+
+    def update_ui_option(self, option: ui.RuntimeOption, model_dir: Path) -> None:
+        option.use_paddle_infer_backend()
+        option.paddle_infer_option.enable_trt = True
+        option.trt_option.serialize_file = str(model_dir / "trt_serialized.trt")
+        if self.dynamic_shapes is not None:
+            option.paddle_infer_option.collect_trt_shape = True
+            # TODO: Support setting collect_trt_shape_by_device
+            for name, shapes in self.dynamic_shapes.items():
+                option.trt_option.set_shape(name, *shapes)
+        if self.dynamic_shape_input_data is not None:
+            for name, data in self.dynamic_shape_input_data.items():
+                option.trt_option.set_input_data(name, *data)
+        option.paddle_infer_option.enable_log_info = self.enable_log_info
+
+
+# Should we use tagged unions?
+BackendConfig: TypeAlias = Union[
+    PaddleInferConfig,
+    OpenVINOConfig,
+    ONNXRuntimeConfig,
+    TensorRTConfig,
+]
+
+
+def get_backend_config_type(backend: Backend, /) -> Type[BackendConfig]:
+    backend_config_type: Type[BackendConfig]
+    if backend == "paddle_infer":
+        backend_config_type = PaddleInferConfig
+    elif backend == "openvino":
+        backend_config_type = OpenVINOConfig
+    elif backend == "onnx_runtime":
+        backend_config_type = ONNXRuntimeConfig
+    elif backend == "tensorrt":
+        backend_config_type = TensorRTConfig
+    else:
+        assert_never(backend)
+    return backend_config_type
+
+
+# Can I create this dynamically and automatically?
+class BackendConfigs(TypedDict, total=False):
+    paddle_infer: PaddleInferConfig
+    openvino: OpenVINOConfig
+    onnx_runtime: ONNXRuntimeConfig
+    tensorrt: TensorRTConfig
+    paddle_tensorrt: PaddleTensorRTConfig
+
+
+class HPIConfig(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+
+    selected_backends: Optional[Dict[DeviceType, Backend]] = None
+    # For backward compatilibity
+    backend_configs: Annotated[
+        Optional[BackendConfigs], Field(validation_alias="backend_config")
+    ] = None
+
+    def get_backend_and_config(
+        self, model_name: str, device_type: DeviceType
+    ) -> Tuple[Backend, BackendConfig]:
+        # Do we need an extensible selector?
+        model_info = get_model_info(model_name, device_type)
+        if model_info:
+            backend_config_pairs = model_info["backend_config_pairs"]
+        else:
+            backend_config_pairs = []
+        config_dict: Dict[str, Any] = {}
+        if self.selected_backends and device_type in self.selected_backends:
+            backend = self.selected_backends[device_type]
+            for pair in backend_config_pairs:
+                # Use the first one
+                if pair[0] == self.selected_backends[device_type]:
+                    config_dict.update(pair[1])
+                    break
+        else:
+            if backend_config_pairs:
+                # Currently we select the first one
+                backend = backend_config_pairs[0][0]
+                config_dict.update(backend_config_pairs[0][1])
+            else:
+                backend = "paddle_infer"
+        if self.backend_configs and backend in self.backend_configs:
+            config_dict.update(
+                self.backend_configs[backend].model_dump(exclude_unset=True)
+            )
+        backend_config_type = get_backend_config_type(backend)
+        backend_config = backend_config_type.model_validate(config_dict)
+        return backend, backend_config
+
+    # XXX: For backward compatilibity
+    @field_validator("selected_backends", mode="before")
+    @classmethod
+    def _hack_selected_backends(cls, data: Any) -> Any:
+        if isinstance(data, Mapping):
+            new_data = dict(data)
+            for device_type in new_data:
+                if new_data[device_type] == "paddle_tensorrt":
+                    warnings.warn(
+                        "`paddle_tensorrt` is deprecated. Please use `paddle_infer` instead.",
+                        FutureWarning,
+                    )
+                    new_data[device_type] = "paddle_infer"
+        return new_data
+
+    @field_validator("backend_configs", mode="before")
+    @classmethod
+    def _hack_backend_configs(cls, data: Any) -> Any:
+        if isinstance(data, Mapping):
+            new_data = dict(data)
+            if new_data and "paddle_tensorrt" in new_data:
+                warnings.warn(
+                    "`paddle_tensorrt` is deprecated. Please use `paddle_infer` instead.",
+                    FutureWarning,
+                )
+                if "paddle_infer" not in new_data:
+                    new_data["paddle_infer"] = {}
+                pptrt_cfg = new_data["paddle_tensorrt"]
+                logging.warning("`paddle_infer.enable_trt` will be set to `True`.")
+                new_data["paddle_infer"]["enable_trt"] = True
+                new_data["paddle_infer"]["trt_dynamic_shapes"] = pptrt_cfg[
+                    "dynamic_shapes"
+                ]
+                if "dynamic_shape_input_data" in pptrt_cfg:
+                    new_data["paddle_infer"]["trt_dynamic_shape_input_data"] = (
+                        pptrt_cfg["dynamic_shape_input_data"]
+                    )
+                logging.warning("`paddle_tensorrt.enable_log_info` will be ignored.")
+        return new_data
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_model_info.py b/libs/paddlex-hpi/src/paddlex_hpi/_model_info.py
new file mode 100644
index 0000000000..516e07b7d9
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/_model_info.py
@@ -0,0 +1,59 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import platform
+from functools import lru_cache
+from typing import Optional
+
+from importlib_resources import files
+from paddlex.utils import logging
+
+from paddlex_hpi._utils.typing import DeviceType
+
+_DB_PATH: str = "model_info_collection.json"
+
+
+@lru_cache(1)
+def _get_model_info_collection() -> dict:
+    with files("paddlex_hpi").joinpath(_DB_PATH).open("r", encoding="utf-8") as f:
+        _model_info_collection = json.load(f)
+    return _model_info_collection
+
+
+def get_model_info(model_name: str, device_type: DeviceType) -> Optional[dict]:
+    # TODO: Typed model info and nearest referents
+    model_info_collection = _get_model_info_collection()
+    uname = platform.uname()
+    arch = uname.machine.lower()
+    if arch not in model_info_collection:
+        return None
+    logging.debug("Getting model information for arch: %s", arch)
+    model_info_collection = model_info_collection[arch]
+    os = uname.system.lower()
+    if os not in model_info_collection:
+        return None
+    logging.debug("Getting model information for OS: %s", os)
+    model_info_collection = model_info_collection[os]
+    if device_type == "cpu":
+        device = "cpu"
+    elif device_type == "gpu":
+        device = "gpu_cuda118_cudnn86"
+    else:
+        return None
+    logging.debug("Getting model information for device: %s", device)
+    model_info_collection = model_info_collection[device]
+    if model_name not in model_info_collection:
+        return None
+    return model_info_collection[model_name]
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py
new file mode 100644
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py
new file mode 100644
index 0000000000..58476146f4
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/compat.py
@@ -0,0 +1,20 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Optional
+
+
+def get_compat_version() -> Optional[str]:
+    return os.getenv("PXD_COMPAT_VERSION")
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py
new file mode 100644
index 0000000000..c9c7fde505
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/misc.py
@@ -0,0 +1,25 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from typing import Union
+
+
+def parse_scale(s: Union[float, str]) -> float:
+    if isinstance(s, float):
+        return s
+    if re.fullmatch(r"1(?:\.|\.0)?\s*/\s*255(?:\.|\.0)?", s):
+        return 1 / 255
+    else:
+        raise ValueError("Invalid scale")
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py b/libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py
new file mode 100644
index 0000000000..ac27f33775
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/_utils/typing.py
@@ -0,0 +1,24 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List, Literal, TypeVar
+
+from typing_extensions import TypeAlias
+
+Data: TypeAlias = Dict[str, Any]
+BatchData: TypeAlias = List[Data]
+DataT = TypeVar("DataT", Data, BatchData)
+
+DeviceType: TypeAlias = Literal["cpu", "gpu"]
+Backend: TypeAlias = Literal["paddle_infer", "openvino", "onnx_runtime", "tensorrt"]
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json b/libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json
new file mode 100644
index 0000000000..98623a89cc
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/model_info_collection.json
@@ -0,0 +1,4422 @@
+{
+  "x86_64": {
+    "linux": {
+      "cpu": {
+        "RT-DETR-H_layout_17cls":{
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus_SOD-largesize-L": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus_SOD-L": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus_SOD-S": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50_ML": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x1_0_ML": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B6_ML": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B4_ML": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B0_ML": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "CLIP_vit_base_patch16_448_ML": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50_face": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileFaceNet": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-S_face": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PicoDet_LCNet_x2_5_face": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "BlazeFace-FPN-SSH": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "BlazeFace": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_rec_CLIP_vit_large": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_rec_CLIP_vit_base": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_rec": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_det": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "STFPM": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet18": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet34": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet101": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet152": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet18_vd": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet34_vd": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50_vd": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet101_vd": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "ResNet152_vd": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "ResNet200_vd": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_25": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_35": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_75": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x1_0": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x1_5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x2_5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x2_0": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNetV2_small": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNetV2_base": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNetV2_large": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x0_35": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x0_5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x0_75": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x1_0": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x1_25": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x0_35": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x0_5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x0_75": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x1_0": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x1_25": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_tiny": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_small": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_base_224": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_base_384": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_large_224": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_large_384": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x0_25": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x0_5": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x0_75": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x1_0": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x0_25": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x0_5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x1_0": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x1_5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x2_0": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_tiny_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_small_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_base_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_base_patch4_window12_384": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_large_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_large_patch4_window12_384": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNet_small": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNet_tiny": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNet_base": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B0": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B1": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B2": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B3": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B4": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B5": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B6": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "CLIP_vit_base_patch16_224": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "CLIP_vit_large_patch14_224": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-X": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-L": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-M": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-S": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-N": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-T": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-S": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-M": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-L": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-X": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOv3-DarkNet53": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOv3-ResNet50_vd_DCN": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOv3-MobileNetV3": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-L": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-H": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-X": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-R18": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-R50": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PicoDet-S": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PicoDet-L": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Mask-RT-DETR-H": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "Mask-RT-DETR-L": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3-R50": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3-R101": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3_Plus-R50": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3_Plus-R101": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LiteSeg-T": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "OCRNet_HRNet-W48": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "OCRNet_HRNet-W18": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SeaFormer_tiny": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SeaFormer_small": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SeaFormer_base": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SeaFormer_large": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B0": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B1": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B2": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B3": {
+          "backend_config_pairs": [
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B4": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B5": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_server_rec": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_mobile_rec": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ch_RepSVTR_rec": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ch_SVTRv2_rec": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_server_det": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_mobile_det": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PicoDet_layout_1x": {
+          "backend_config_pairs": [
+            [
+              "openvino",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "SLANet": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SLANet_plus": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        }
+      },
+      "gpu_cuda118_cudnn86": {
+        "RT-DETR-H_layout_17cls": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "LaTeX_OCR_rec": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus_SOD-largesize-L": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus_SOD-L": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus_SOD-S": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50_ML": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x1_0_ML": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B6_ML": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B4_ML": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B0_ML": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "CLIP_vit_base_patch16_448_ML": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50_face": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileFaceNet": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-S_face": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PicoDet_LCNet_x2_5_face": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "BlazeFace-FPN-SSH": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "BlazeFace": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_rec_CLIP_vit_large": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_rec_CLIP_vit_base": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_rec": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-ShiTuV2_det": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "STFPM": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet18": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet34": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet101": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet152": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet18_vd": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet34_vd": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet50_vd": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet101_vd": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet152_vd": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ResNet200_vd": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_25": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_35": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_5": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x0_75": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x1_0": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x1_5": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x2_5": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNet_x2_0": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNetV2_small": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNetV2_base": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-LCNetV2_large": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x0_35": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x0_5": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x0_75": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x1_0": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_large_x1_25": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x0_35": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x0_5": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x0_75": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x1_0": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV3_small_x1_25": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_tiny": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_small": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_base_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_base_384": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_large_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "ConvNeXt_large_384": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x0_25": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x0_5": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x0_75": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV1_x1_0": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x0_25": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x0_5": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x1_0": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x1_5": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "MobileNetV2_x2_0": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_tiny_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_small_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_base_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_base_patch4_window12_384": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_large_patch4_window7_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SwinTransformer_large_patch4_window12_384": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNet_small": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNet_tiny": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNet_base": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B0": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B1": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B2": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B3": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B4": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B5": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-HGNetV2-B6": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "CLIP_vit_base_patch16_224": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "CLIP_vit_large_patch14_224": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-X": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-L": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-M": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-YOLOE_plus-S": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-N": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-T": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-S": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-M": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-L": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "YOLOX-X": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "YOLOv3-DarkNet53": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "YOLOv3-ResNet50_vd_DCN": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "YOLOv3-MobileNetV3": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-L": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-H": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-X": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-R18": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "RT-DETR-R50": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PicoDet-S": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PicoDet-L": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Mask-RT-DETR-H": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "Mask-RT-DETR-L": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3-R50": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3-R101": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3_Plus-R50": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "Deeplabv3_Plus-R101": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-LiteSeg-T": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "OCRNet_HRNet-W48": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "OCRNet_HRNet-W18": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SeaFormer_tiny": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ]
+          ]
+        },
+        "SeaFormer_small": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ]
+          ]
+        },
+        "SeaFormer_base": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ]
+          ]
+        },
+        "SeaFormer_large": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ]
+          ]
+        },
+        "SegFormer-B0": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B1": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B2": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B3": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B4": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SegFormer-B5": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_server_rec": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_mobile_rec": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ch_RepSVTR_rec": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "ch_SVTRv2_rec": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_server_det": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ]
+          ]
+        },
+        "PP-OCRv4_mobile_det": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "PicoDet_layout_1x": {
+          "backend_config_pairs": [
+            [
+              "tensorrt",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ],
+            [
+              "onnx_runtime",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {}
+            ]
+          ]
+        },
+        "SLANet": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ]
+          ]
+        },
+        "SLANet_plus": {
+          "backend_config_pairs": [
+            [
+              "paddle_infer",
+              {}
+            ],
+            [
+              "paddle_infer",
+              {
+                "enable_trt": true
+              }
+            ]
+          ]
+        }
+      }
+    }
+  }
+}
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py b/libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py
new file mode 100644
index 0000000000..a60b103a1b
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/__init__.py
@@ -0,0 +1,51 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex_hpi.models.anomaly_detection import UadPredictor
+from paddlex_hpi.models.base import HPPredictor
+from paddlex_hpi.models.face_recognition import FaceRecPredictor
+from paddlex_hpi.models.formula_recognition import LaTeXOCRPredictor
+from paddlex_hpi.models.general_recognition import ShiTuRecPredictor
+from paddlex_hpi.models.image_classification import ClasPredictor
+from paddlex_hpi.models.image_unwarping import WarpPredictor
+from paddlex_hpi.models.instance_segmentation import InstanceSegPredictor
+from paddlex_hpi.models.multilabel_classification import MLClasPredictor
+from paddlex_hpi.models.object_detection import DetPredictor
+from paddlex_hpi.models.semantic_segmentation import SegPredictor
+from paddlex_hpi.models.table_recognition import TablePredictor
+from paddlex_hpi.models.text_detection import TextDetPredictor
+from paddlex_hpi.models.text_recognition import TextRecPredictor
+from paddlex_hpi.models.ts_ad import TSAdPredictor
+from paddlex_hpi.models.ts_cls import TSClsPredictor
+from paddlex_hpi.models.ts_fc import TSFcPredictor
+
+__all__ = [
+    "UadPredictor",
+    "HPPredictor",
+    "FaceRecPredictor",
+    "LaTeXOCRPredictor",
+    "ShiTuRecPredictor",
+    "ClasPredictor",
+    "WarpPredictor",
+    "InstanceSegPredictor",
+    "MLClasPredictor",
+    "DetPredictor",
+    "SegPredictor",
+    "TablePredictor",
+    "TextDetPredictor",
+    "TextRecPredictor",
+    "TSAdPredictor",
+    "TSClsPredictor",
+    "TSFcPredictor",
+]
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py b/libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py
new file mode 100644
index 0000000000..88acf7216b
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/anomaly_detection.py
@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import SegResult
+from paddlex.modules.anomaly_detection.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor
+
+
+class UadPredictor(CVPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.segmentation.PyOnlyAnomalyDetectionModel:
+        model = ui.vision.segmentation.PyOnlyAnomalyDetectionModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            uad_result = self._create_uad_result(data, ui_result)
+            results.append({"result": uad_result})
+        return results
+
+    def _create_uad_result(self, data: Data, ui_result: Any) -> SegResult:
+        pred = np.array(ui_result.label_map, dtype=np.int32).reshape(ui_result.shape)
+        pred = pred[np.newaxis]
+        dic = {
+            "input_path": data["input_path"],
+            "pred": pred,
+        }
+        return SegResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/base.py b/libs/paddlex-hpi/src/paddlex_hpi/models/base.py
new file mode 100644
index 0000000000..638e564cbd
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/base.py
@@ -0,0 +1,189 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+from os import PathLike
+from pathlib import Path
+from typing import (
+    Any,
+    Dict,
+    Final,
+    Generator,
+    List,
+    Optional,
+    Protocol,
+    TypedDict,
+    Union,
+)
+
+import ultrainfer as ui
+from ultrainfer.model import BaseUltraInferModel
+from paddlex.inference.components import ReadImage, ReadTS
+from paddlex.inference.models import BasePredictor
+from paddlex.inference.utils.new_ir_blacklist import NEWIR_BLOCKLIST
+from paddlex.utils import device as device_helper
+from paddlex.utils import logging
+from paddlex.utils.subclass_register import AutoRegisterABCMetaClass
+from typing_extensions import assert_never
+
+from paddlex_hpi._config import HPIConfig
+from paddlex_hpi._utils.typing import Backend, BatchData
+
+HPI_CONFIG_KEY: Final[str] = "Hpi"
+
+
+class HPIParams(TypedDict, total=False):
+    serial_number: Optional[str]
+    update_license: bool
+    config: Dict[str, Any]
+
+
+class HPPredictor(BasePredictor, metaclass=AutoRegisterABCMetaClass):
+    __is_base = True
+
+    def __init__(
+        self,
+        model_dir: Union[str, PathLike],
+        config: Optional[Dict[str, Any]] = None,
+        device: Optional[str] = None,
+        hpi_params: Optional[HPIParams] = None,
+    ) -> None:
+        super().__init__(model_dir=model_dir, config=config)
+        self._device = device or device_helper.get_default_device()
+        self._hpi_params = hpi_params or {}
+        self._hpi_config = self._get_hpi_config()
+        self._ui_model = self.build_ui_model()
+
+    @property
+    def model_path(self) -> Path:
+        return self.model_dir / f"{self.MODEL_FILE_PREFIX}.pdmodel"
+
+    @property
+    def params_path(self) -> Path:
+        return self.model_dir / f"{self.MODEL_FILE_PREFIX}.pdiparams"
+
+    def set_predictor(self, **kwargs: Any) -> None:
+        if "device" in kwargs:
+            device = kwargs.pop("device")
+            if device is not None:
+                if device != self._device:
+                    raise RuntimeError("Currently, changing devices is not supported.")
+        if kwargs:
+            raise TypeError(f"Unexpected arguments: {kwargs}")
+
+    def build_ui_model(self) -> BaseUltraInferModel:
+        option = self._create_ui_option()
+        return self._build_ui_model(option)
+
+    @abc.abstractmethod
+    def _build_ui_model(self, option: ui.RuntimeOption) -> BaseUltraInferModel:
+        raise NotImplementedError
+
+    def _get_hpi_config(self) -> HPIConfig:
+        if HPI_CONFIG_KEY not in self.config:
+            logging.debug("Key %r not found in the config", HPI_CONFIG_KEY)
+        hpi_config = HPIConfig.model_validate(
+            {
+                **self.config.get(HPI_CONFIG_KEY, {}),
+                **self._hpi_params.get("config", {}),
+            }
+        )
+        return hpi_config
+
+    def _get_selected_backend(self) -> Backend:
+        device_type, _ = device_helper.parse_device(self._device)
+        backend = self._hpi_config.get_selected_backend(self.model_name, device_type)
+        return backend
+
+    def _create_ui_option(self) -> ui.RuntimeOption:
+        option = ui.RuntimeOption()
+        # HACK: Disable new IR for models that are known to have issues with the
+        # new IR.
+        if self.model_name in NEWIR_BLOCKLIST:
+            option.paddle_infer_option.enable_new_ir = False
+        device_type, device_ids = device_helper.parse_device(self._device)
+        if device_type == "cpu":
+            pass
+        elif device_type == "gpu":
+            if device_ids is None:
+                device_ids = [0]
+            if len(device_ids) > 1:
+                logging.warning(
+                    "Multiple devices are specified (%s), but only the first one will be used.",
+                    self._device,
+                )
+            option.use_gpu(device_ids[0])
+        else:
+            assert_never(device_type)
+        backend, backend_config = self._hpi_config.get_backend_and_config(
+            model_name=self.model_name, device_type=device_type
+        )
+        logging.info("Backend: %s", backend)
+        logging.info("Backend config: %s", backend_config)
+        backend_config.update_ui_option(option, self.model_dir)
+        return option
+
+
+class _DataReaderLike(Protocol):
+    batch_size: int
+
+    def __call__(self, input_list: Any) -> Generator[BatchData, None, None]: ...
+
+
+class HPPredictorWithDataReader(HPPredictor):
+    def __init__(
+        self,
+        model_dir: Union[str, PathLike],
+        config: Optional[Dict[str, Any]] = None,
+        device: Optional[str] = None,
+        hpi_params: Optional[HPIParams] = None,
+    ) -> None:
+        super().__init__(
+            model_dir=model_dir,
+            config=config,
+            device=device,
+            hpi_params=hpi_params,
+        )
+        self._batch_size = 1
+        self._data_reader = self._build_data_reader()
+
+    def set_predictor(self, **kwargs: Any) -> None:
+        batch_size = kwargs.pop("batch_size", None)
+        super().set_predictor(**kwargs)
+        if batch_size is not None:
+            self._batch_size = batch_size
+            self._data_reader.batch_size = batch_size
+            logging.info("Batch size updated to %d", self._batch_size)
+
+    def apply(self, input: Any) -> Generator[BatchData, None, None]:
+        for batch_data in self._data_reader(input):
+            yield self._predict(batch_data)
+
+    @abc.abstractmethod
+    def _build_data_reader(self) -> _DataReaderLike:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        raise NotImplementedError
+
+
+class CVPredictor(HPPredictorWithDataReader):
+    def _build_data_reader(self) -> _DataReaderLike:
+        return ReadImage(batch_size=self._batch_size, format="BGR")
+
+
+class TSPredictor(HPPredictorWithDataReader):
+    def _build_data_reader(self) -> _DataReaderLike:
+        return ReadTS(batch_size=self._batch_size)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py
new file mode 100644
index 0000000000..12c89c02f0
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/face_recognition.py
@@ -0,0 +1,23 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+from paddlex.modules.face_recognition.model_list import MODELS
+
+from paddlex_hpi.models.general_recognition import ShiTuRecPredictor
+
+
+class FaceRecPredictor(ShiTuRecPredictor):
+    entities = MODELS
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py
new file mode 100644
index 0000000000..4a15f0e7dc
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/formula_recognition.py
@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import FormulaRecResult
+from paddlex.modules.formula_recognition.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor
+
+
+class LaTeXOCRPredictor(CVPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.ocr.PyOnlyFormulaRecognitionModel:
+        model = ui.vision.ocr.PyOnlyFormulaRecognitionModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [
+            np.ascontiguousarray(data["img"]).astype("float32") for data in batch_data
+        ]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            rec_result = self._create_rec_result(data, ui_result)
+            results.append({"result": rec_result})
+        return results
+
+    def _create_rec_result(self, data: Data, ui_result: Any) -> FormulaRecResult:
+        dic = {
+            "input_path": data["input_path"],
+            "rec_text": ui_result.rec_text,
+        }
+        return FormulaRecResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py
new file mode 100644
index 0000000000..86f69df680
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/general_recognition.py
@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import BaseResult
+from paddlex.modules.general_recognition.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor
+
+
+class ShiTuRecPredictor(CVPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.classification.PPShiTuV2Recognizer:
+        model = ui.vision.classification.PPShiTuV2Recognizer(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [
+            np.ascontiguousarray(data["img"]).astype("float32") for data in batch_data
+        ]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            clas_result = self._create_rec_result(data, ui_result)
+            results.append({"result": clas_result})
+        return results
+
+    def _create_rec_result(self, data: Data, ui_result: Any) -> BaseResult:
+        dic = {
+            "input_path": data["input_path"],
+            "feature": np.array(ui_result.feature, dtype="float32"),
+        }
+        return BaseResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py b/libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py
new file mode 100644
index 0000000000..20dd8ff056
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/image_classification.py
@@ -0,0 +1,91 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List, Optional, Union
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import TopkResult
+from paddlex.modules.image_classification.model_list import MODELS
+from pydantic import BaseModel
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor, HPIParams
+
+
+class _ClasPPParams(BaseModel):
+    topk: int
+    label_list: Optional[List[str]] = None
+
+
+class ClasPredictor(CVPredictor):
+    entities = MODELS
+
+    def __init__(
+        self,
+        model_dir: Union[str, os.PathLike],
+        config: Optional[Dict[str, Any]] = None,
+        device: Optional[str] = None,
+        hpi_params: Optional[HPIParams] = None,
+    ) -> None:
+        super().__init__(
+            model_dir=model_dir,
+            config=config,
+            device=device,
+            hpi_params=hpi_params,
+        )
+        self._pp_params = self._get_pp_params()
+        self._ui_model.postprocessor.topk = self._pp_params.topk
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.classification.PaddleClasModel:
+        model = ui.vision.classification.PaddleClasModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            clas_result = self._create_clas_result(data, ui_result)
+            results.append({"result": clas_result})
+        return results
+
+    def _get_pp_params(self) -> _ClasPPParams:
+        pp_config = self.config["PostProcess"]
+        if "Topk" not in pp_config:
+            raise RuntimeError("`Topk` config not found")
+        topk_config = pp_config["Topk"]
+        topk = topk_config["topk"]
+        label_list = topk_config.get("label_list", None)
+        return _ClasPPParams(topk=topk, label_list=label_list)
+
+    def _create_clas_result(self, data: Data, ui_result: Any) -> TopkResult:
+        dic = {
+            "input_path": data["input_path"],
+            "class_ids": ui_result.label_ids,
+            "scores": np.around(ui_result.scores, decimals=5).tolist(),
+        }
+        if self._pp_params.label_list is not None:
+            dic["label_names"] = [
+                self._pp_params.label_list[i] for i in ui_result.label_ids
+            ]
+        return TopkResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py b/libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py
new file mode 100644
index 0000000000..6559e1c0e6
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/image_unwarping.py
@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import DocTrResult
+from paddlex.modules.image_unwarping.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor
+
+
+class WarpPredictor(CVPredictor):
+    entities = MODELS
+
+    def _build_ui_model(self, option: ui.RuntimeOption) -> ui.vision.ocr.UVDocWarpper:
+        model = ui.vision.ocr.UVDocWarpper(
+            str(self.model_path),
+            str(self.params_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            warp_result = self._create_warp_result(data, ui_result)
+            results.append({"result": warp_result})
+        return results
+
+    def _create_warp_result(self, data: Data, ui_result: Any) -> DocTrResult:
+        img = ui_result.numpy()
+        img = np.moveaxis(img[0], 0, 2)
+        img *= 255
+        img = img[:, :, ::-1]
+        img = img.astype("uint8")
+        dic = {
+            "input_path": data["input_path"],
+            "doctr_img": img,
+        }
+        return DocTrResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py b/libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py
new file mode 100644
index 0000000000..7151eae36a
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/instance_segmentation.py
@@ -0,0 +1,105 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List, Optional, Union
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import InstanceSegResult
+from paddlex.modules.instance_segmentation.model_list import MODELS
+from pydantic import BaseModel
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor, HPIParams
+
+
+class _InstanceSegPPParams(BaseModel):
+    threshold: float
+    label_list: List[str]
+
+
+class InstanceSegPredictor(CVPredictor):
+    entities = MODELS
+
+    def __init__(
+        self,
+        model_dir: Union[str, os.PathLike],
+        config: Optional[Dict[str, Any]] = None,
+        device: Optional[str] = None,
+        hpi_params: Optional[HPIParams] = None,
+    ) -> None:
+        super().__init__(
+            model_dir=model_dir,
+            config=config,
+            device=device,
+            hpi_params=hpi_params,
+        )
+        self._pp_params = self._get_pp_params()
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.detection.PaddleDetectionModel:
+        model = ui.vision.detection.PaddleDetectionModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            instance_seg_result = self._create_instance_seg_result(data, ui_result)
+            results.append({"result": instance_seg_result})
+        return results
+
+    def _get_pp_params(self) -> _InstanceSegPPParams:
+        return _InstanceSegPPParams(
+            threshold=self.config["draw_threshold"],
+            label_list=self.config["label_list"],
+        )
+
+    def _create_instance_seg_result(
+        self, data: Data, ui_result: Any
+    ) -> InstanceSegResult:
+        inds = sorted(
+            range(len(ui_result.scores)), key=ui_result.scores.__getitem__, reverse=True
+        )
+        inds = [i for i in inds if ui_result.scores[i] > self._pp_params.threshold]
+        inds = [i for i in inds if ui_result.label_ids[i] > -1]
+        ids = [ui_result.label_ids[i] for i in inds]
+        scores = [ui_result.scores[i] for i in inds]
+        boxes = [ui_result.boxes[i] for i in inds]
+        masks = [ui_result.masks[i] for i in inds]
+        masks = [
+            np.array(mask.data, dtype=np.uint8).reshape(mask.shape) for mask in masks
+        ]
+        dic = {
+            "input_path": data["input_path"],
+            "boxes": [
+                {
+                    "cls_id": id_,
+                    "label": self._pp_params.label_list[id_],
+                    "score": score,
+                    "coordinate": box,
+                }
+                for id_, score, box in zip(ids, scores, boxes)
+            ],
+            "masks": masks,
+        }
+        return InstanceSegResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py b/libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py
new file mode 100644
index 0000000000..a8d9438ec5
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/multilabel_classification.py
@@ -0,0 +1,80 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List, Optional, Union
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import MLClassResult
+from paddlex.modules.multilabel_classification.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor, HPIParams
+
+
+class MLClasPredictor(CVPredictor):
+    entities = MODELS
+
+    def __init__(
+        self,
+        model_dir: Union[str, os.PathLike],
+        config: Optional[Dict[str, Any]] = None,
+        device: Optional[str] = None,
+        hpi_params: Optional[HPIParams] = None,
+    ) -> None:
+        super().__init__(
+            model_dir=model_dir,
+            config=config,
+            device=device,
+            hpi_params=hpi_params,
+        )
+        self._label_list = self._get_label_list()
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.classification.PyOnlyMultilabelClassificationModel:
+        model = ui.vision.classification.PyOnlyMultilabelClassificationModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            ml_clas_result = self._create_ml_clas_result(data, ui_result)
+            results.append({"result": ml_clas_result})
+        return results
+
+    def _get_label_list(self) -> Optional[List[str]]:
+        pp_config = self.config["PostProcess"]
+        if "MultiLabelThreshOutput" not in pp_config:
+            raise RuntimeError("`MultiLabelThreshOutput` config not found")
+        label_list = pp_config["MultiLabelThreshOutput"].get("label_list", None)
+        return label_list
+
+    def _create_ml_clas_result(self, data: Data, ui_result: Any) -> MLClassResult:
+        dic = {
+            "input_path": data["input_path"],
+            "class_ids": ui_result.label_ids,
+            "scores": np.around(ui_result.scores, decimals=5).tolist(),
+        }
+        if self._label_list is not None:
+            dic["label_names"] = [self._label_list[i] for i in ui_result.label_ids]
+        return MLClassResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py b/libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py
new file mode 100644
index 0000000000..9867135bec
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/object_detection.py
@@ -0,0 +1,98 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List, Optional, Union
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import DetResult
+from paddlex.modules.object_detection.model_list import MODELS
+from pydantic import BaseModel
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor, HPIParams
+
+
+class _DetPPParams(BaseModel):
+    threshold: float
+    label_list: List[str]
+
+
+class DetPredictor(CVPredictor):
+    entities = MODELS
+
+    def __init__(
+        self,
+        model_dir: Union[str, os.PathLike],
+        config: Optional[Dict[str, Any]] = None,
+        device: Optional[str] = None,
+        hpi_params: Optional[HPIParams] = None,
+    ) -> None:
+        super().__init__(
+            model_dir=model_dir,
+            config=config,
+            device=device,
+            hpi_params=hpi_params,
+        )
+        self._pp_params = self._get_pp_params()
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.detection.PaddleDetectionModel:
+        model = ui.vision.detection.PaddleDetectionModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            det_result = self._create_det_result(data, ui_result)
+            results.append({"result": det_result})
+        return results
+
+    def _get_pp_params(self) -> _DetPPParams:
+        return _DetPPParams(
+            threshold=self.config["draw_threshold"],
+            label_list=self.config["label_list"],
+        )
+
+    def _create_det_result(self, data: Data, ui_result: Any) -> DetResult:
+        inds = sorted(
+            range(len(ui_result.scores)), key=ui_result.scores.__getitem__, reverse=True
+        )
+        inds = [i for i in inds if ui_result.scores[i] > self._pp_params.threshold]
+        inds = [i for i in inds if ui_result.label_ids[i] > -1]
+        ids = [ui_result.label_ids[i] for i in inds]
+        scores = [ui_result.scores[i] for i in inds]
+        boxes = [ui_result.boxes[i] for i in inds]
+        dic = {
+            "input_path": data["input_path"],
+            "boxes": [
+                {
+                    "cls_id": id_,
+                    "label": self._pp_params.label_list[id_],
+                    "score": score,
+                    "coordinate": box,
+                }
+                for id_, score, box in zip(ids, scores, boxes)
+            ],
+        }
+        return DetResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py b/libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py
new file mode 100644
index 0000000000..90d3da3ae5
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/semantic_segmentation.py
@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import SegResult
+from paddlex.modules.semantic_segmentation.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor
+
+
+class SegPredictor(CVPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.segmentation.PaddleSegModel:
+        model = ui.vision.segmentation.PaddleSegModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            seg_result = self._create_seg_result(data, ui_result)
+            results.append({"result": seg_result})
+        return results
+
+    def _create_seg_result(self, data: Data, ui_result: Any) -> SegResult:
+        pred = np.array(ui_result.label_map, dtype=np.int32).reshape(ui_result.shape)
+        pred = pred[np.newaxis]
+        dic = {
+            "input_path": data["input_path"],
+            "pred": pred,
+        }
+        return SegResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py
new file mode 100644
index 0000000000..8fe33b544a
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/table_recognition.py
@@ -0,0 +1,68 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tempfile
+from typing import Any, List
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import TableRecResult
+from paddlex.modules.table_recognition.model_list import MODELS
+
+from paddlex_hpi._utils.compat import get_compat_version
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor
+
+
+class TablePredictor(CVPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.vision.ocr.StructureV2Table:
+        compat_version = get_compat_version()
+        if compat_version == "2.5" or self.model_name == "SLANet":
+            bbox_shape_type = "ori"
+        else:
+            bbox_shape_type = "pad"
+        with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".txt") as f:
+            pp_config = self.config["PostProcess"]
+            for lab in pp_config["character_dict"]:
+                f.write(lab + "\n")
+            f.flush()
+            model = ui.vision.ocr.StructureV2Table(
+                str(self.model_path),
+                str(self.params_path),
+                table_char_dict_path=f.name,
+                box_shape=bbox_shape_type,
+                runtime_option=option,
+            )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            table_result = self._create_table_result(data, ui_result)
+            results.append({"result": table_result})
+        return results
+
+    def _create_table_result(self, data: Data, ui_result: Any) -> TableRecResult:
+        dic = {
+            "input_path": data["input_path"],
+            "bbox": ui_result.table_boxes,
+            "structure": ui_result.table_structure,
+        }
+        return TableRecResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py b/libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py
new file mode 100644
index 0000000000..7e9946415c
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/text_detection.py
@@ -0,0 +1,167 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List, Optional, Union
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import TextDetResult
+from paddlex.modules.text_detection.model_list import CURVE_MODELS, MODELS
+
+from paddlex_hpi._utils.misc import parse_scale
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor, HPIParams
+
+
+class TextDetPredictor(CVPredictor):
+    entities = MODELS
+
+    def __init__(
+        self,
+        model_dir: Union[str, os.PathLike],
+        config: Optional[Dict[str, Any]] = None,
+        device: Optional[str] = None,
+        hpi_params: Optional[HPIParams] = None,
+    ) -> None:
+        super().__init__(
+            model_dir=model_dir,
+            config=config,
+            device=device,
+            hpi_params=hpi_params,
+        )
+
+    # HACK
+    @property
+    def _is_curve_model(self) -> bool:
+        return self.model_name in CURVE_MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> Union[ui.vision.ocr.DBDetector, ui.vision.ocr.DBCURVEDetector]:
+        if self._is_curve_model:
+            model = ui.vision.ocr.DBCURVEDetector(
+                str(self.model_path),
+                str(self.params_path),
+                runtime_option=option,
+            )
+        else:
+            model = ui.vision.ocr.DBDetector(
+                str(self.model_path),
+                str(self.params_path),
+                runtime_option=option,
+            )
+        self._config_ui_preprocessor(model)
+        self._config_ui_postprocessor(model)
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_results = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            text_det_result = self._create_text_det_result(data, ui_result)
+            results.append({"result": text_det_result})
+        return results
+
+    def _config_ui_preprocessor(self, model: ui.vision.ocr.DBDetector) -> None:
+        pp_config = self.config["PreProcess"]
+        preprocessor = model.preprocessor
+        for item in pp_config["transform_ops"]:
+            op_name = next(iter(item))
+            op_config = item[op_name]
+            # XXX: Default values copied from
+            # `paddlex.inference.models.TextDetPredictor`
+            if op_name == "DecodeImage":
+                if op_config["channel_first"]:
+                    raise RuntimeError(
+                        "`DecodeImage.channel_first` must be set to False."
+                    )
+            elif op_name == "DetResizeForTest":
+                preprocessor.max_side_len = op_config.get("resize_long", 960)
+            elif op_name == "NormalizeImage":
+                if "scale" in op_config and not (
+                    abs(parse_scale(op_config["scale"]) - 1 / 255) < 1e-9
+                ):
+                    raise RuntimeError("`NormalizeImage.scale` must be set to 1/255.")
+                if "channel_num" in op_config and op_config["channel_num"] != 3:
+                    raise RuntimeError("`NormalizeImage.channel_num` must be set to 3.")
+                preprocessor.set_normalize(
+                    op_config.get("mean", [0.485, 0.456, 0.406]),
+                    op_config.get("std", [0.229, 0.224, 0.225]),
+                    True,
+                )
+            elif op_name == "ToCHWImage":
+                # Do nothing
+                pass
+            elif op_name == "DetLabelEncode":
+                pass
+            elif op_name == "KeepKeys":
+                pass
+            else:
+                raise RuntimeError(f"Unkown preprocessing operator: {op_name}")
+
+    def _config_ui_postprocessor(self, model: ui.vision.ocr.DBDetector) -> None:
+        pp_config = self.config["PostProcess"]
+        # XXX: Default values copied from
+        # `paddlex.inference.models.TextDetPredictor`
+        changeable_params: Dict[str, Any] = {
+            "thresh": 0.3,
+            "box_thresh": 0.7,
+            "unclip_ratio": 2.0,
+            "score_mode": "fast",
+            "use_dilation": False,
+        }
+        unchangeable_params: Dict[str, Any] = {
+            "max_candidates": 1000,
+            "box_type": "quad",
+        }
+        if self._is_curve_model:
+            changeable_params["box_type"] = unchangeable_params.pop("box_type")
+        if "name" in pp_config and pp_config["name"] == "DBPostProcess":
+            for name in changeable_params:
+                if name in pp_config:
+                    changeable_params[name] = pp_config[name]
+            for name, val in unchangeable_params.items():
+                if name in pp_config and pp_config[name] != val:
+                    raise RuntimeError(
+                        f"`DBPostProcess.{name}` must be set to {repr(val)}."
+                    )
+        else:
+            raise RuntimeError("Invalid config")
+        postprocessor = model.postprocessor
+        postprocessor.det_db_thresh = changeable_params["thresh"]
+        postprocessor.det_db_box_thresh = changeable_params["box_thresh"]
+        postprocessor.det_db_unclip_ratio = changeable_params["unclip_ratio"]
+        postprocessor.use_dilation = changeable_params["use_dilation"]
+        postprocessor.det_db_score_mode = changeable_params["score_mode"]
+        if self._is_curve_model:
+            if changeable_params["box_type"] not in ("quad", "poly"):
+                raise RuntimeError("Invalid value of `DBPostProcess.box_type`.")
+            if changeable_params["box_type"] == "quad":
+                postprocessor.det_db_box_type = "bbox"
+            else:
+                postprocessor.det_db_box_type = "poly"
+
+    def _create_text_det_result(self, data: Data, ui_result: Any) -> TextDetResult:
+        polys = [list(zip(*([iter(box)] * 2))) for box in ui_result.boxes]
+        # XXX: Currently, we cannot get scores from `ui_result`, so we
+        # temporarily use dummy scores here.
+        dummy_scores = [0.0 for _ in ui_result.boxes]
+        dic = {
+            "input_path": data["input_path"],
+            "dt_polys": polys,
+            "dt_scores": dummy_scores,
+        }
+        return TextDetResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py b/libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py
new file mode 100644
index 0000000000..636e2fba6b
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/text_recognition.py
@@ -0,0 +1,86 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tempfile
+from typing import List
+
+import ultrainfer as ui
+import numpy as np
+from paddlex.inference.results import TextRecResult
+from paddlex.modules.text_recognition.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import CVPredictor
+
+
+class TextRecPredictor(CVPredictor):
+    entities = MODELS
+
+    def _build_ui_model(self, option: ui.RuntimeOption) -> ui.vision.ocr.Recognizer:
+        with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".txt") as f:
+            pp_config = self.config["PostProcess"]
+            for lab in pp_config["character_dict"]:
+                f.write(lab + "\n")
+            f.flush()
+            model = ui.vision.ocr.Recognizer(
+                str(self.model_path),
+                str(self.params_path),
+                label_path=f.name,
+                runtime_option=option,
+            )
+            self._config_ui_preprocessor(model)
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        imgs = [np.ascontiguousarray(data["img"]) for data in batch_data]
+        ui_result = self._ui_model.batch_predict(imgs)
+        results: BatchData = []
+        for data, text, score in zip(batch_data, ui_result.text, ui_result.rec_scores):
+            text_rec_result = self._create_text_rec_result(data, text, score)
+            results.append({"result": text_rec_result})
+        return results
+
+    def _config_ui_preprocessor(self, model: ui.vision.ocr.Recognizer) -> None:
+        pp_config = self.config["PreProcess"]
+        preprocessor = model.preprocessor
+        found_resize_op = False
+        for item in pp_config["transform_ops"]:
+            op_name = next(iter(item))
+            op_config = item[op_name]
+            if op_name == "DecodeImage":
+                if op_config["channel_first"]:
+                    raise RuntimeError(
+                        "`DecodeImage.channel_first` must be set to False."
+                    )
+            elif op_name == "RecResizeImg":
+                preprocessor.rec_image_shape = op_config["image_shape"]
+                found_resize_op = True
+            elif op_name == "MultiLabelEncode":
+                pass
+            elif op_name == "KeepKeys":
+                pass
+            else:
+                raise RuntimeError(f"Unkown preprocessing operator: {op_name}")
+        if not found_resize_op:
+            raise RuntimeError("Could not find the config for `RecResizeImg`.")
+
+    def _create_text_rec_result(
+        self, data: Data, text: str, score: float
+    ) -> TextRecResult:
+        dic = {
+            "input_path": data["input_path"],
+            "rec_text": text,
+            "rec_score": score,
+        }
+        return TextRecResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py
new file mode 100644
index 0000000000..2d3bc03dc1
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_ad.py
@@ -0,0 +1,58 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import pandas as pd
+from paddlex.inference.results import TSAdResult
+from paddlex.modules.ts_anomaly_detection.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import TSPredictor
+
+
+class TSAdPredictor(TSPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.ts.anomalydetection.PyOnlyAnomalyDetectionModel:
+        model = ui.ts.anomalydetection.PyOnlyAnomalyDetectionModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        ts_data = [data["ts"] for data in batch_data]
+        ui_results = self._ui_model.batch_predict(ts_data)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            ts_ad_result = self._create_ts_ad_result(data, ui_result)
+            results.append({"result": ts_ad_result})
+        return results
+
+    def _create_ts_ad_result(self, data: Data, ui_result: Any) -> TSAdResult:
+        data_dict = {
+            ui_result.col_names[i]: ui_result.data[i]
+            for i in range(len(ui_result.col_names))
+        }
+        anomaly = pd.DataFrame.from_dict(data_dict)
+        anomaly.index = ui_result.dates
+        anomaly.index.name = "timestamp"
+        dic = {"input_path": data["input_path"], "anomaly": anomaly}
+        return TSAdResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py
new file mode 100644
index 0000000000..406ee67f1f
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_cls.py
@@ -0,0 +1,55 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import pandas as pd
+from paddlex.inference.results import TSClsResult
+from paddlex.modules.ts_classification.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import TSPredictor
+
+
+class TSClsPredictor(TSPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.ts.classification.PyOnlyClassificationModel:
+        model = ui.ts.classification.PyOnlyClassificationModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        ts_data = [data["ts"] for data in batch_data]
+        ui_results = self._ui_model.batch_predict(ts_data)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            ts_cls_result = self._create_ts_cls_result(data, ui_result)
+            results.append({"result": ts_cls_result})
+        return results
+
+    def _create_ts_cls_result(self, data: Data, ui_result: Any) -> TSClsResult:
+        classification = pd.DataFrame.from_dict(
+            {"classid": [ui_result.class_id], "score": [ui_result.score]}
+        )
+        classification.index.name = "sample"
+        dic = {"input_path": data["input_path"], "classification": classification}
+        return TSClsResult(dic)
diff --git a/libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py
new file mode 100644
index 0000000000..c1168539f8
--- /dev/null
+++ b/libs/paddlex-hpi/src/paddlex_hpi/models/ts_fc.py
@@ -0,0 +1,58 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List
+
+import ultrainfer as ui
+import pandas as pd
+from paddlex.inference.results import TSFcResult
+from paddlex.modules.ts_forecast.model_list import MODELS
+
+from paddlex_hpi._utils.typing import BatchData, Data
+from paddlex_hpi.models.base import TSPredictor
+
+
+class TSFcPredictor(TSPredictor):
+    entities = MODELS
+
+    def _build_ui_model(
+        self, option: ui.RuntimeOption
+    ) -> ui.ts.forecasting.PyOnlyForecastingModel:
+        model = ui.ts.forecasting.PyOnlyForecastingModel(
+            str(self.model_path),
+            str(self.params_path),
+            str(self.config_path),
+            runtime_option=option,
+        )
+        return model
+
+    def _predict(self, batch_data: BatchData) -> BatchData:
+        ts_data = [data["ts"] for data in batch_data]
+        ui_results = self._ui_model.batch_predict(ts_data)
+        results: BatchData = []
+        for data, ui_result in zip(batch_data, ui_results):
+            ts_fc_result = self._create_ts_fc_result(data, ui_result)
+            results.append({"result": ts_fc_result})
+        return results
+
+    def _create_ts_fc_result(self, data: Data, ui_result: Any) -> TSFcResult:
+        data_dict = {
+            ui_result.col_names[i]: ui_result.data[i]
+            for i in range(len(ui_result.col_names))
+        }
+        forecast = pd.DataFrame.from_dict(data_dict)
+        forecast.index = ui_result.dates
+        forecast.index.name = "date"
+        dic = {"input_path": data["input_path"], "forecast": forecast}
+        return TSFcResult(dic)
diff --git a/libs/paddlex-hpi/test_requirements.txt b/libs/paddlex-hpi/test_requirements.txt
new file mode 100644
index 0000000000..314e76a4f8
--- /dev/null
+++ b/libs/paddlex-hpi/test_requirements.txt
@@ -0,0 +1,2 @@
+pytest >= 8
+shapely >= 2
diff --git a/libs/paddlex-hpi/tests/__init__.py b/libs/paddlex-hpi/tests/__init__.py
new file mode 100644
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/paddlex-hpi/tests/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/paddlex-hpi/tests/models/__init__.py b/libs/paddlex-hpi/tests/models/__init__.py
new file mode 100644
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/paddlex-hpi/tests/models/base.py b/libs/paddlex-hpi/tests/models/base.py
new file mode 100644
index 0000000000..511791d6a0
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/base.py
@@ -0,0 +1,117 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import shutil
+import tempfile
+from pathlib import Path
+from types import GeneratorType
+
+import pytest
+from tests.testing_utils.download import download, download_and_extract
+from tests.testing_utils.misc import get_filename
+
+NUM_INPUT_FILES = 10
+DEVICES = ["cpu", "gpu:0"]
+BATCH_SIZES = [1, 2, 4]
+
+
+class BaseTestPredictor(object):
+    @property
+    def model_dir(self):
+        raise NotImplementedError
+
+    @property
+    def model_url(self):
+        raise NotImplementedError
+
+    @property
+    def input_data_url(self):
+        raise NotImplementedError
+
+    @property
+    def expected_result_url(self):
+        raise NotImplementedError
+
+    @property
+    def predictor_cls(self):
+        raise NotImplementedError
+
+    @pytest.fixture(scope="class")
+    def data_dir(self):
+        with tempfile.TemporaryDirectory() as td:
+            yield Path(td)
+
+    @pytest.fixture(scope="class")
+    def model_path(self, data_dir):
+        download_and_extract(self.model_url, data_dir, "model")
+        yield data_dir / "model"
+
+    @pytest.fixture(scope="class")
+    def input_data_path(self, data_dir):
+        input_data_path = (data_dir / get_filename(self.input_data_url)).with_stem(
+            "test"
+        )
+        download(self.input_data_url, input_data_path)
+        yield input_data_path
+
+    @pytest.fixture(scope="class")
+    def input_data_dir(self, data_dir, input_data_path):
+        input_data_dir = data_dir / "input_data"
+        input_data_dir.mkdir()
+        for i in range(NUM_INPUT_FILES):
+            shutil.copy(
+                input_data_path,
+                (input_data_dir / f"test_{i}").with_suffix(input_data_path.suffix),
+            )
+        yield input_data_dir
+
+    @pytest.fixture(scope="class")
+    def expected_result(self, data_dir):
+        expected_result_path = data_dir / "expected.json"
+        download(self.expected_result_url, expected_result_path)
+        with open(expected_result_path, "r", encoding="utf-8") as f:
+            expected_result = json.load(f)
+        yield expected_result
+
+    @pytest.mark.parametrize("device", DEVICES)
+    def test___call__single_input_data(
+        self, model_path, input_data_path, device, expected_result
+    ):
+        predictor = self.predictor_cls(model_path, device=device)
+        output = predictor(str(input_data_path))
+        self._check_output(output, expected_result, 1)
+        output = predictor([str(input_data_path), str(input_data_path)])
+        self._check_output(output, expected_result, 2)
+
+    @pytest.mark.parametrize("device", DEVICES)
+    @pytest.mark.parametrize("batch_size", BATCH_SIZES)
+    def test___call__input_data_dir(
+        self, model_path, input_data_dir, device, batch_size, expected_result
+    ):
+        predictor = self.predictor_cls(model_path, device=device)
+        predictor.set_predictor(batch_size=batch_size)
+        output = predictor(str(input_data_dir))
+        self._check_output(output, expected_result, NUM_INPUT_FILES)
+
+    def _check_output(self, output, expected_result, expected_num_results):
+        assert isinstance(output, GeneratorType)
+        # Note that this exhausts the generator
+        output = list(output)
+        assert len(output) == expected_num_results
+        for result in output:
+            self._check_result(result, expected_result)
+
+    def _check_result(self, result, expected_result):
+        raise NotImplementedError
diff --git a/libs/paddlex-hpi/tests/models/test_anomaly_detection.py b/libs/paddlex-hpi/tests/models/test_anomaly_detection.py
new file mode 100644
index 0000000000..87dd2b2527
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_anomaly_detection.py
@@ -0,0 +1,49 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddlex.inference.results import SegResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import UadPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/uad_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/uad_input.png"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/uad_result.json"
+
+
+class TestUadPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return UadPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, SegResult)
+        assert set(result) == set(expected_result)
+        pred = result["pred"]
+        expected_pred = np.array(expected_result["pred"], dtype=np.int32)
+        assert pred.shape == expected_pred.shape
+        assert (pred != expected_pred).sum() / pred.size < 0.01
diff --git a/libs/paddlex-hpi/tests/models/test_formula_recognition.py b/libs/paddlex-hpi/tests/models/test_formula_recognition.py
new file mode 100644
index 0000000000..d4cca67b21
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_formula_recognition.py
@@ -0,0 +1,45 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex.inference.results import FormulaRecResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import LaTeXOCRPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/latex_ocr_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/latex_ocr_input.png"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/latex_ocr_result.json"
+
+
+class TestLaTeXOCRPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return LaTeXOCRPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, FormulaRecResult)
+        assert set(result) == set(expected_result)
+        assert result["rec_text"] == expected_result["rec_text"]
diff --git a/libs/paddlex-hpi/tests/models/test_general_recognition.py b/libs/paddlex-hpi/tests/models/test_general_recognition.py
new file mode 100644
index 0000000000..4b7facbc2a
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_general_recognition.py
@@ -0,0 +1,49 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex.inference.results import BaseResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import ShiTuRecPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/shitu_rec_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/shitu_rec_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/shitu_rec_result.json"
+
+
+class TestShiTuRecPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return ShiTuRecPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, BaseResult)
+        assert set(result) == set(expected_result)
+        expected_result = expected_result["rec_feature"]
+        result = result["rec_feature"].tolist()
+        assert sum([abs(x - y) for x, y in zip(result, expected_result)]) < 0.001 * len(
+            result
+        )
diff --git a/libs/paddlex-hpi/tests/models/test_image_classification.py b/libs/paddlex-hpi/tests/models/test_image_classification.py
new file mode 100644
index 0000000000..6189d15f5f
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_image_classification.py
@@ -0,0 +1,53 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddlex.inference.results import TopkResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import ClasPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/clas_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/clas_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/clas_result.json"
+
+
+class TestClasPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return ClasPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, TopkResult)
+        assert set(result) == set(expected_result)
+        assert result["class_ids"] == expected_result["class_ids"]
+        assert np.allclose(
+            np.array(result["scores"]),
+            np.array(expected_result["scores"]),
+            rtol=1e-2,
+            atol=1e-3,
+        )
+        assert result["label_names"] == expected_result["label_names"]
diff --git a/libs/paddlex-hpi/tests/models/test_image_unwarping.py b/libs/paddlex-hpi/tests/models/test_image_unwarping.py
new file mode 100644
index 0000000000..d5fb0e9df9
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_image_unwarping.py
@@ -0,0 +1,51 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddlex.inference.results import DocTrResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import WarpPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/warp_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/warp_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/warp_result.json"
+
+
+class TestWarpPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return WarpPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, DocTrResult)
+        assert set(result) == set(expected_result)
+        assert np.allclose(
+            result["doctr_img"],
+            np.array(expected_result["doctr_img"]),
+            rtol=1e-2,
+            atol=2,
+        )
diff --git a/libs/paddlex-hpi/tests/models/test_instance_segmentation.py b/libs/paddlex-hpi/tests/models/test_instance_segmentation.py
new file mode 100644
index 0000000000..faa99bef0f
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_instance_segmentation.py
@@ -0,0 +1,54 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex.inference.results import InstanceSegResult
+from tests.models.base import BaseTestPredictor
+from tests.testing_utils.cv import compare_det_results
+
+from paddlex_hpi.models import InstanceSegPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/instance_seg_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/instance_seg_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/instance_seg_result.json"
+
+
+class TestInstanceSegPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return InstanceSegPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, InstanceSegResult)
+        assert set(result) == set(expected_result)
+        # TODO: Check masks
+        compare_det_results(
+            [obj["coordinate"] for obj in result["boxes"]],
+            [obj["coordinate"] for obj in expected_result["boxes"]],
+            labels1=[obj["cls_id"] for obj in result["boxes"]],
+            labels2=[obj["cls_id"] for obj in expected_result["boxes"]],
+            scores1=[obj["score"] for obj in result["boxes"]],
+            scores2=[obj["score"] for obj in expected_result["boxes"]],
+        )
diff --git a/libs/paddlex-hpi/tests/models/test_multilabel_classification.py b/libs/paddlex-hpi/tests/models/test_multilabel_classification.py
new file mode 100644
index 0000000000..729e95a62b
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_multilabel_classification.py
@@ -0,0 +1,53 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddlex.inference.results import MLClassResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import MLClasPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ml_clas_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ml_clas_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ml_clas_result.json"
+
+
+class TestMLClasPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return MLClasPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, MLClassResult)
+        assert set(result) == set(expected_result)
+        assert result["class_ids"] == expected_result["class_ids"]
+        assert np.allclose(
+            np.array(result["scores"]),
+            np.array(expected_result["scores"]),
+            rtol=1e-2,
+            atol=1e-3,
+        )
+        assert result["label_names"] == expected_result["label_names"]
diff --git a/libs/paddlex-hpi/tests/models/test_object_detection.py b/libs/paddlex-hpi/tests/models/test_object_detection.py
new file mode 100644
index 0000000000..4da6c8049d
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_object_detection.py
@@ -0,0 +1,53 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex.inference.results import DetResult
+from tests.models.base import BaseTestPredictor
+from tests.testing_utils.cv import compare_det_results
+
+from paddlex_hpi.models import DetPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/det_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/det_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/det_result.json"
+
+
+class TestDetPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return DetPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, DetResult)
+        assert set(result) == set(expected_result)
+        compare_det_results(
+            [obj["coordinate"] for obj in result["boxes"]],
+            [obj["coordinate"] for obj in expected_result["boxes"]],
+            labels1=[obj["cls_id"] for obj in result["boxes"]],
+            labels2=[obj["cls_id"] for obj in expected_result["boxes"]],
+            scores1=[obj["score"] for obj in result["boxes"]],
+            scores2=[obj["score"] for obj in expected_result["boxes"]],
+        )
diff --git a/libs/paddlex-hpi/tests/models/test_semantic_segmentation.py b/libs/paddlex-hpi/tests/models/test_semantic_segmentation.py
new file mode 100644
index 0000000000..1954dbf3c1
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_semantic_segmentation.py
@@ -0,0 +1,49 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddlex.inference.results import SegResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import SegPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/seg_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/seg_input.png"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/seg_result.json"
+
+
+class TestSegPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return SegPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, SegResult)
+        assert set(result) == set(expected_result)
+        pred = result["pred"]
+        expected_pred = np.array(expected_result["pred"], dtype=np.int32)
+        assert pred.shape == expected_pred.shape
+        assert (pred != expected_pred).sum() / pred.size < 0.01
diff --git a/libs/paddlex-hpi/tests/models/test_table_recognition.py b/libs/paddlex-hpi/tests/models/test_table_recognition.py
new file mode 100644
index 0000000000..29b8be31ea
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_table_recognition.py
@@ -0,0 +1,59 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex.inference.results import TableRecResult
+from tests.models.base import BaseTestPredictor
+from tests.testing_utils.cv import compare_det_results
+
+from paddlex_hpi.models import TablePredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/table_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/table_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/table_result.json"
+
+
+class TestTablePredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return TablePredictor
+
+    def _check_result(self, result, expected_result):
+        def _unflatten_poly(poly):
+            return [
+                [poly[0], poly[1]],
+                [poly[2], poly[3]],
+                [poly[4], poly[5]],
+                [poly[6], poly[7]],
+            ]
+
+        assert isinstance(result, TableRecResult)
+        assert set(result) == set(expected_result)
+        compare_det_results(
+            [_unflatten_poly(poly) for poly in result["bbox"]],
+            [_unflatten_poly(poly) for poly in expected_result["bbox"]],
+            labels1=result["structure"],
+            labels2=expected_result["structure"],
+        )
diff --git a/libs/paddlex-hpi/tests/models/test_text_detection.py b/libs/paddlex-hpi/tests/models/test_text_detection.py
new file mode 100644
index 0000000000..9b014b3a38
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_text_detection.py
@@ -0,0 +1,47 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex.inference.results import TextDetResult
+from tests.models.base import BaseTestPredictor
+from tests.testing_utils.cv import compare_det_results
+
+from paddlex_hpi.models import TextDetPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_det_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_det_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_det_result.json"
+
+
+class TestTextDetPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return TextDetPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, TextDetResult)
+        assert set(result) == set(expected_result)
+        compare_det_results(result["dt_polys"], expected_result["dt_polys"])
+        # Currently no checks for scores
diff --git a/libs/paddlex-hpi/tests/models/test_text_recognition.py b/libs/paddlex-hpi/tests/models/test_text_recognition.py
new file mode 100644
index 0000000000..08eb79425a
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_text_recognition.py
@@ -0,0 +1,52 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from paddlex.inference.results import TextRecResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import TextRecPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_rec_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_rec_input.jpg"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/text_rec_result.json"
+
+
+class TestTextRecPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return TextRecPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, TextRecResult)
+        assert set(result) == set(expected_result)
+        assert result["rec_text"] == expected_result["rec_text"]
+        assert np.allclose(
+            np.array(result["rec_score"]),
+            np.array(expected_result["rec_score"]),
+            rtol=1e-2,
+            atol=1e-3,
+        )
diff --git a/libs/paddlex-hpi/tests/models/test_ts_ad.py b/libs/paddlex-hpi/tests/models/test_ts_ad.py
new file mode 100644
index 0000000000..760c11ca10
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_ts_ad.py
@@ -0,0 +1,49 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+from paddlex.inference.results import TSAdResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import TSAdPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_ad_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_ad_input.csv"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_ad_result.json"
+
+
+class TestTSAdPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return TSAdPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, TSAdResult)
+        assert set(result) == set(expected_result)
+        expected_result = json.loads(expected_result["anomaly"])
+        result = result["anomaly"].to_dict(orient="records")
+        assert result == expected_result
diff --git a/libs/paddlex-hpi/tests/models/test_ts_cls.py b/libs/paddlex-hpi/tests/models/test_ts_cls.py
new file mode 100644
index 0000000000..3cd42a74f6
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_ts_cls.py
@@ -0,0 +1,50 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+from paddlex.inference.results import TSClsResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import TSClsPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_cls_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_cls_input.csv"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_cls_result.json"
+
+
+class TestTSClsPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return TSClsPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, TSClsResult)
+        assert set(result) == set(expected_result)
+        expected_result = json.loads(expected_result["classification"])
+        result = result["classification"].to_dict(orient="records")
+        assert result[0]["classid"] == expected_result[0]["classid"]
+        assert round(result[0]["score"], 3) == round(expected_result[0]["score"], 3)
diff --git a/libs/paddlex-hpi/tests/models/test_ts_fc.py b/libs/paddlex-hpi/tests/models/test_ts_fc.py
new file mode 100644
index 0000000000..8203a5cbe0
--- /dev/null
+++ b/libs/paddlex-hpi/tests/models/test_ts_fc.py
@@ -0,0 +1,51 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+from paddlex.inference.results import TSFcResult
+from tests.models.base import BaseTestPredictor
+
+from paddlex_hpi.models import TSFcPredictor
+
+MODEL_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_fc_model.zip"
+INPUT_DATA_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_fc_input.csv"
+EXPECTED_RESULT_URL = "https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/tests/models/ts_fc_result.json"
+
+
+class TestTSFcPredictor(BaseTestPredictor):
+    @property
+    def model_url(self):
+        return MODEL_URL
+
+    @property
+    def input_data_url(self):
+        return INPUT_DATA_URL
+
+    @property
+    def expected_result_url(self):
+        return EXPECTED_RESULT_URL
+
+    @property
+    def predictor_cls(self):
+        return TSFcPredictor
+
+    def _check_result(self, result, expected_result):
+        assert isinstance(result, TSFcResult)
+        assert set(result) == set(expected_result)
+        expected_result = json.loads(expected_result["forecast"])
+        expected_result = [{"OT": round(i["OT"], 3)} for i in expected_result]
+        result = result["forecast"].to_dict(orient="records")
+        result = [{"OT": round(i["OT"], 3)} for i in result]
+        assert result == expected_result
diff --git a/libs/paddlex-hpi/tests/testing_utils/__init__.py b/libs/paddlex-hpi/tests/testing_utils/__init__.py
new file mode 100644
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/paddlex-hpi/tests/testing_utils/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/paddlex-hpi/tests/testing_utils/cv.py b/libs/paddlex-hpi/tests/testing_utils/cv.py
new file mode 100644
index 0000000000..221ad34345
--- /dev/null
+++ b/libs/paddlex-hpi/tests/testing_utils/cv.py
@@ -0,0 +1,96 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from shapely.geometry import Polygon
+
+
+def compute_iou(box_or_poly1, box_or_poly2):
+    if isinstance(box_or_poly1[0], list):
+        poly1 = box_or_poly1
+        poly2 = box_or_poly2
+
+        poly1 = Polygon(poly1)
+        poly2 = Polygon(poly2)
+
+        inter_area = poly1.intersection(poly2).area
+        union_area = poly1.union(poly2).area
+
+        iou = inter_area / (union_area + 1e-9)
+
+        return iou
+    else:
+        box1 = box_or_poly1
+        box2 = box_or_poly2
+
+        x11, y11, x12, y12 = box1
+        x21, y21, x22, y22 = box2
+
+        xi1 = max(x11, x21)
+        yi1 = max(y11, y21)
+        xi2 = min(x12, x22)
+        yi2 = min(y12, y22)
+
+        inter_area = max(0, xi2 - xi1 + 1) * max(0, yi2 - yi1 + 1)
+        box1_area = (x12 - x11 + 1) * (y12 - y11 + 1)
+        box2_area = (x22 - x21 + 1) * (y22 - y21 + 1)
+        union_area = box1_area + box2_area - inter_area
+
+        iou = inter_area / (union_area + 1e-9)
+
+        return iou
+
+
+def compare_det_results(
+    boxes_or_polys1,
+    boxes_or_polys2,
+    *,
+    labels1=None,
+    labels2=None,
+    scores1=None,
+    scores2=None,
+    iou_tol=0.1,
+    score_tol=1e-3,
+):
+    compare_labels = labels1 is not None
+    compare_scores = scores1 is not None
+
+    assert len(boxes_or_polys1) == len(boxes_or_polys2)
+    if compare_labels:
+        assert len(labels1) == len(labels2)
+    if compare_scores:
+        assert len(scores1) == len(scores2)
+
+    boxes_or_polys2 = boxes_or_polys2.copy()
+    if labels2 is not None:
+        labels2 = labels2.copy()
+    if scores2 is not None:
+        scores2 = scores2.copy()
+    for i, box_or_poly1 in enumerate(boxes_or_polys1):
+        j = 0
+        max_iou = 0
+        for k, box_or_poly2 in enumerate(boxes_or_polys2):
+            iou = compute_iou(box_or_poly1, box_or_poly2)
+            if iou > max_iou:
+                max_iou = iou
+                j = k
+        assert max_iou > 1 - iou_tol
+        if compare_labels:
+            assert labels1[i] == labels2[j]
+        if compare_scores:
+            assert abs(scores1[i] - scores2[j]) < score_tol
+        del boxes_or_polys2[j]
+        if compare_labels:
+            del labels2[j]
+        if compare_scores:
+            del scores2[j]
diff --git a/libs/paddlex-hpi/tests/testing_utils/download.py b/libs/paddlex-hpi/tests/testing_utils/download.py
new file mode 100644
index 0000000000..e95e46e698
--- /dev/null
+++ b/libs/paddlex-hpi/tests/testing_utils/download.py
@@ -0,0 +1,107 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import shutil
+import tarfile
+import tempfile
+import zipfile
+from pathlib import Path
+from urllib.request import urlopen
+
+
+def _download(url, save_path):
+    with urlopen(url) as r:
+        with open(save_path, "wb") as file:
+            shutil.copyfileobj(r, file)
+
+
+def _extract_zip_file(file_path, extd_dir):
+    with zipfile.ZipFile(file_path, "r") as f:
+        file_list = f.namelist()
+        for file in file_list:
+            f.extract(file, extd_dir)
+
+
+def _extract_tar_file(file_path, extd_dir):
+    with tarfile.open(file_path, "r:*") as f:
+        file_list = f.getnames()
+        for file in file_list:
+            f.extract(file, extd_dir)
+
+
+def _extract(file_path, extd_dir):
+    if zipfile.is_zipfile(file_path):
+        handler = _extract_zip_file
+    elif tarfile.is_tarfile(file_path):
+        handler = _extract_tar_file
+    else:
+        raise ValueError("Unsupported file format")
+    handler(file_path, extd_dir)
+
+
+def _remove_if_exists(path):
+    if path.exists():
+        if path.is_dir():
+            shutil.rmtree(path)
+        else:
+            path.unlink()
+
+
+def download(url, save_path, overwrite=False):
+    save_path.parent.mkdir(exist_ok=True)
+    if overwrite:
+        _remove_if_exists(save_path)
+    if not save_path.exists():
+        _download(url, save_path)
+
+
+def extract(file_path, extd_dir):
+    return _extract(file_path, extd_dir)
+
+
+def download_and_extract(url, save_dir, dst_name, overwrite=False, no_interm_dir=True):
+    save_dir = Path(save_dir)
+    save_dir.mkdir(exist_ok=True)
+    dst_path = save_dir / dst_name
+    if overwrite:
+        _remove_if_exists(dst_path)
+
+    if not dst_path.exists():
+        with tempfile.TemporaryDirectory() as td:
+            td = Path(td)
+            arc_file_path = td / url.split("/")[-1]
+            extd_dir = arc_file_path.stem
+            _download(url, arc_file_path)
+            tmp_extd_dir = td / "extracted"
+            _extract(arc_file_path, tmp_extd_dir)
+            if no_interm_dir:
+                paths = list(tmp_extd_dir.iterdir())
+                if len(paths) == 1:
+                    sp = paths[0]
+                else:
+                    sp = tmp_extd_dir / dst_name
+                if not sp.exists():
+                    raise FileNotFoundError
+                dp = save_dir / sp.name
+                if sp.is_dir():
+                    shutil.copytree(sp, dp)
+                else:
+                    shutil.copyfile(sp, dp)
+                extd_file = dp
+            else:
+                shutil.copytree(tmp_extd_dir, extd_dir)
+                extd_file = extd_dir
+
+            if not dst_path.exists() or not extd_file.samefile(dst_path):
+                shutil.move(extd_file, dst_path)
diff --git a/libs/paddlex-hpi/tests/testing_utils/misc.py b/libs/paddlex-hpi/tests/testing_utils/misc.py
new file mode 100644
index 0000000000..13d9790c69
--- /dev/null
+++ b/libs/paddlex-hpi/tests/testing_utils/misc.py
@@ -0,0 +1,19 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from urllib.parse import urlparse
+
+
+def get_filename(url):
+    return urlparse(url).path.split("/")[-1]
diff --git a/libs/ultrainfer/.gitignore b/libs/ultrainfer/.gitignore
new file mode 100644
index 0000000000..4742e191f5
--- /dev/null
+++ b/libs/ultrainfer/.gitignore
@@ -0,0 +1,54 @@
+build
+cmake-build-debug
+cmake-build-release
+.vscode
+UltraInfer.cmake
+build-debug.sh
+*dist
+ultrainfer.egg-info
+ultrainfer_python.egg-info
+ultrainfer_gpu_python.egg-info
+.setuptools-cmake-build
+ultrainfer/version.py
+ultrainfer/core/config.h
+python/ultrainfer/c_lib_wrap.py
+python/ultrainfer/LICENSE*
+python/build_cpu.sh
+python/ultrainfer/ThirdPartyNotices*
+*.so*
+python/ultrainfer/libs/third_libs
+ultrainfer/core/config.h
+ultrainfer/pybind/main.cc
+python/ultrainfer/libs/lib*
+python/ultrainfer/libs/third_libs
+__pycache__
+python/scripts/process_libraries.py
+.vs
+.idea
+.DS_Store
+miniprogram_npm
+node_modules
+.DS_Store
+dist
+etc
+lib
+dist-ssr
+coverage
+*.local
+yalc.*
+.yalc
+examples/vision/collect_quantize_cc.sh
+examples/vision/tests_quantize
+ultrainfer/LICENSE
+ultrainfer/ThirdPartyNotices.txt
+UltraInferCSharp.cmake
+python/ultrainfer/code_version.py
+*.pdmodel
+*.pdiparams
+*.pdiparams.info
+log.txt
+benchmark/paddlex/build
+benchmark/cpp/build
+!paddlex/paddlex3.0/serving/libs/**/*.so*
+TensorRT*
+third_party
diff --git a/libs/ultrainfer/CMakeLists.txt b/libs/ultrainfer/CMakeLists.txt
new file mode 100755
index 0000000000..0f1072a37d
--- /dev/null
+++ b/libs/ultrainfer/CMakeLists.txt
@@ -0,0 +1,735 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+PROJECT(ultrainfer C CXX)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
+
+
+option(CSRCS_DIR_NAME "Name of source code directory")
+option(LIBRARY_NAME "Name of build library name")
+option(PY_LIBRARY_NAME "Name of build python library name")
+if(NOT CSRCS_DIR_NAME)
+  set(CSRCS_DIR_NAME ".")
+endif()
+if(NOT LIBRARY_NAME)
+  set(LIBRARY_NAME "ultrainfer")
+endif()
+if(NOT PY_LIBRARY_NAME)
+  set(PY_LIBRARY_NAME "ultrainfer_main")
+endif()
+
+include(ExternalProject)
+set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs)
+set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/third_party)
+
+add_subdirectory(${CSRCS_DIR_NAME}/ultrainfer)
+include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake)
+
+# Set C++11 as standard for the whole project
+if(NOT MSVC)
+  if(NOT DEFINED CMAKE_CXX_STANDARD)
+    set(CMAKE_CXX_STANDARD 11)
+  endif()
+  set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3")
+  if(NEED_ABI0)
+    add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+  else()
+    add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
+  endif()
+endif(NOT MSVC)
+
+include(${PROJECT_SOURCE_DIR}/cmake/build_tools.cmake)
+if(UNIX AND (NOT APPLE) AND (NOT WITH_TIMVX))
+  download_patchelf()
+  set(PATCHELF_EXE ${THIRD_PARTY_PATH}/patchelf/bin/patchelf)
+endif()
+
+
+############################# Basic Options for UltraInfer ################################
+option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF)
+option(WITH_IPU "Whether WITH_IPU=ON, will enable paddle-infernce-ipu" OFF)
+option(WITH_OPENCL "Whether WITH_OPENCL=ON, will enable paddle-lite-gpu" OFF)
+option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
+option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
+option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
+option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF)
+option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF)
+option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF)
+option(ENABLE_SOPHGO_BACKEND "Whether to enable SOPHON backend." OFF)
+option(ENABLE_TVM_BACKEND "Whether to enable TVM backend." OFF)
+option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
+option(ENABLE_HORIZON_BACKEND "Whether to enable HORIZON backend." OFF)
+option(ENABLE_VISION "Whether to enable vision models usage." OFF)
+option(ENABLE_TEXT "Whether to enable text models usage." OFF)
+option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
+option(ENABLE_CVCUDA "Whether to enable NVIDIA CV-CUDA to boost image preprocess." OFF)
+option(ENABLE_BENCHMARK "Whether to enable Benchmark mode." OFF)
+option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
+option(WITH_DIRECTML "Whether to compile for onnxruntime DirectML deploy." OFF)
+option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
+option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
+option(WITH_TESTING "Whether to compile with unittest." OFF)
+option(WITH_CAPI "Whether to compile with c api." OFF)
+option(WITH_CSHARPAPI "Whether to compile with c# api" OFF)
+
+option(BUILD_EXAMPLES "Whether to build ultrainfer with vision examples" OFF)
+option(BUILD_PADDLE2ONNX "Whether to build paddle2onnx from sources" OFF)
+
+option(BUILD_FD_TRITON_BACKEND "Whether to compile as Triton Inference Server backend." OFF)
+
+######################### Paths to user's custom libraries directory #####################
+set(CUDA_DIRECTORY "" CACHE PATH "If build tensorrt backend, need to define path of cuda library.")
+set(TRT_DIRECTORY "" CACHE PATH "If build tensorrt backend, need to define path of tensorrt library.")
+set(ORT_DIRECTORY "" CACHE PATH "User can specify the installed onnxruntime directory.")
+set(OPENCV_DIRECTORY "" CACHE PATH "User can specify the installed opencv directory.")
+set(OPENVINO_DIRECTORY "" CACHE PATH "User can specify the installed openvino directory.")
+
+# Whether to build ultrainfer on device Nvidia Jetson
+# Only support CPU Inference & GPU(TensorRT) Inference Now
+option(BUILD_ON_JETSON "Whether to build ultrainfer on Nvidia Jetson" OFF)
+if(BUILD_ON_JETSON)
+  set(WITH_GPU ON)
+  set(ENABLE_TRT_BACKEND ON)
+  set(ENABLE_ORT_BACKEND ON)
+endif()
+
+# config GIT_URL with github mirrors to speed up dependent repos clone
+option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
+if(NOT GIT_URL)
+    set(GIT_URL "https://github.com")
+endif()
+
+# check build options
+include(${PROJECT_SOURCE_DIR}/cmake/check.cmake)
+
+if(WIN32)
+  add_definitions(-DYAML_CPP_DLL)
+  set(YAML_BUILD_SHARED_LIBS ON)
+  set(YAML_CPP_INSTALL ON)
+  set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
+endif()
+
+if(NOT CUDA_DIRECTORY)
+  set(CUDA_DIRECTORY "/usr/local/cuda")
+endif()
+
+option(BUILD_ULTRAINFER_PYTHON "if build python lib for ultrainfer." OFF)
+
+set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
+include_directories(${HEAD_DIR})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+if (WITH_TIMVX)
+  include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake)
+endif()
+
+if (WITH_ASCEND)
+  include(${PROJECT_SOURCE_DIR}/cmake/ascend.cmake)
+endif()
+
+if (WITH_KUNLUNXIN)
+  include(${PROJECT_SOURCE_DIR}/cmake/kunlunxin.cmake)
+endif()
+
+if(WITH_IPU)
+  if(NOT ENABLE_PADDLE_BACKEND)
+    message("Will force to set ENABLE_PADDLE_BACKEND when build with GraphCore IPU.")
+    set(ENABLE_PADDLE_BACKEND ON)
+  endif()
+  add_definitions(-DWITH_IPU)
+endif()
+
+# Check for macOS architecture
+get_osx_architecture()
+
+##################################### Building: UltraInfer C++ SDK #######################################
+add_definitions(-DULTRAINFER_LIB)
+# set CMAKE_BUILD_TYPE to Release
+add_definitions(-DCMAKE_BUILD_TYPE=Release)
+# configure files before glob sources.
+configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/core/config.h)
+configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pybind/main.cc)
+file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/*.cc)
+file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/ort/*.cc)
+file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/paddle/*.cc)
+file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/poros/*.cc)
+file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tensorrt/*.cpp)
+file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/openvino/*.cc)
+file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/rknpu2/*.cc)
+file(GLOB_RECURSE DEPLOY_HORIZON_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/horizon/*.cc)
+file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/sophgo/*.cc)
+file(GLOB_RECURSE DEPLOY_TVM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tvm/*.cc)
+file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/lite/*.cc)
+file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pipeline/*.cc)
+file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/vision/*.cc)
+file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/text/*.cc)
+file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/*_pybind.cc)
+file(GLOB_RECURSE DEPLOY_PADDLE_CUSTOM_OP_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/paddle/ops/*.cc)
+if(WITH_GPU)
+  file(GLOB_RECURSE DEPLOY_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/*.cu)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_CUDA_SRCS})
+  file(GLOB_RECURSE DEPLOY_PADDLE_CUSTOM_OP_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/paddle/ops/*.cu)
+  list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_CUDA_SRCS})
+  file(GLOB_RECURSE DEPLOY_VISION_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/vision/*.cu)
+  list(APPEND DEPLOY_VISION_SRCS ${DEPLOY_VISION_CUDA_SRCS})
+  file(GLOB_RECURSE DEPLOY_TEXT_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/text/*.cu)
+  list(APPEND DEPLOY_TEXT_SRCS ${DEPLOY_TEXT_CUDA_SRCS})
+endif()
+list(REMOVE_ITEM DEPLOY_PADDLE_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_SRCS})
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS}
+                                 ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS}
+                                 ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS}
+                                 ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS}
+                                 ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS}
+                                 ${DEPLOY_SOPHGO_SRCS}
+                                 ${DEPLOY_HORIZON_SRCS} ${DEPLOY_TVM_SRCS}
+                                 ${DEPLOY_PADDLE_CUSTOM_OP_SRCS})
+
+
+set(DEPEND_LIBS "")
+
+file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" ULTRAINFER_VERSION)
+string(STRIP "${ULTRAINFER_VERSION}" ULTRAINFER_VERSION)
+
+# Add eigen lib
+download_eigen()
+include_directories(${PROJECT_SOURCE_DIR}/third_party/eigen)
+if(WIN32)
+  add_definitions(-DEIGEN_STRONG_INLINE=inline)
+endif()
+
+# sw(sunway) not support thread_local semantic
+if(WITH_SW)
+  add_definitions(-DEIGEN_AVOID_THREAD_LOCAL)
+endif()
+
+if(ENABLE_ORT_BACKEND)
+  set(ENABLE_PADDLE2ONNX ON)
+  add_definitions(-DENABLE_ORT_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake)
+  list(APPEND DEPEND_LIBS external_onnxruntime)
+endif()
+
+if(ENABLE_LITE_BACKEND)
+  add_definitions(-DENABLE_LITE_BACKEND)
+  include(${PROJECT_SOURCE_DIR}/cmake/paddlelite.cmake)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_LITE_SRCS})
+  list(APPEND DEPEND_LIBS external_paddle_lite)
+endif()
+
+if(ENABLE_PADDLE_BACKEND)
+  set(ENABLE_PADDLE2ONNX ON)
+  add_definitions(-DENABLE_PADDLE_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake)
+  list(APPEND DEPEND_LIBS external_paddle_inference)
+  if(external_dnnl_FOUND)
+    list(APPEND DEPEND_LIBS external_dnnl external_omp)
+  endif()
+  if(external_ort_FOUND)
+    list(APPEND DEPEND_LIBS external_p2o external_ort)
+  endif()
+  if(PADDLEINFERENCE_API_CUSTOM_OP)
+    set_paddle_custom_ops_compatible_policy()
+    list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_SRCS})
+    if(WITH_GPU)
+      list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_CUSTOM_OP_CUDA_SRCS})
+    endif()
+  endif()
+endif()
+
+if(ENABLE_OPENVINO_BACKEND)
+  set(ENABLE_PADDLE2ONNX ON)
+  add_definitions(-DENABLE_OPENVINO_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
+endif()
+
+if(ENABLE_RKNPU2_BACKEND)
+  add_definitions(-DENABLE_RKNPU2_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_RKNPU2_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/rknpu2.cmake)
+  list(APPEND DEPEND_LIBS ${RKNN_RT_LIB})
+endif()
+
+if(ENABLE_HORIZON_BACKEND)
+  add_definitions(-DENABLE_HORIZON_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_HORIZON_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/horizon.cmake)
+  list(APPEND DEPEND_LIBS ${BPU_libs})
+endif()
+
+if(ENABLE_TVM_BACKEND)
+  set(CMAKE_CXX_STANDARD 17)
+  add_definitions(-DENABLE_TVM_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TVM_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/tvm.cmake)
+  list(APPEND DEPEND_LIBS ${TVM_RUNTIME_LIB})
+endif()
+
+if(ENABLE_SOPHGO_BACKEND)
+  add_definitions(-DENABLE_SOPHGO_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_SOPHGO_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/sophgo.cmake)
+  list(APPEND DEPEND_LIBS ${SOPHGO_RT_LIB})
+endif()
+
+if(ENABLE_POROS_BACKEND)
+  set(CMAKE_CXX_STANDARD 14)
+  add_definitions(-DENABLE_POROS_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_POROS_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/poros.cmake)
+  list(APPEND DEPEND_LIBS external_poros)
+  set(PYTHON_MINIMUM_VERSION 3.6)
+  set(PYTORCH_MINIMUM_VERSION 1.9)
+  set(TENSORRT_MINIMUM_VERSION 8.0)
+  # find python3
+  find_package(Python3 ${PYTHON_MINIMUM_VERSION} REQUIRED COMPONENTS Interpreter Development)
+  message(STATUS "Found Python: ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}.${Python3_VERSION_PATCH}")
+
+  if (NOT Python3_SITELIB)
+    message(FATAL_ERROR "site-packages not found. ")
+  else ()
+    message(STATUS "site-packages: ${Python3_SITELIB}")
+  endif ()
+  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/poros/common)
+  # find trt
+  if(NOT WITH_GPU)
+  message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
+  endif()
+  if(NOT TRT_DIRECTORY)
+    message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must define -DTRT_DIRECTORY, e.g -DTRT_DIRECTORY=/Downloads/TensorRT-8.4")
+  endif()
+  include_directories(${TRT_DIRECTORY}/include)
+  find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
+  find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
+  find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
+  list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
+  if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
+    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
+  endif()
+  if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
+    file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
+  endif()
+  find_package(Python COMPONENTS Interpreter Development REQUIRED)
+  message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...")
+  execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
+endif()
+
+if(WITH_GPU)
+  add_definitions(-DWITH_GPU)
+  include_directories(${CUDA_DIRECTORY}/include)
+  if(WIN32)
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
+    find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64)
+    add_definitions(-DENABLE_NVJPEG)
+  else()
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
+    if(NOT BUILD_ON_JETSON)
+      find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64)
+      add_definitions(-DENABLE_NVJPEG)
+    endif()
+  endif()
+  list(APPEND DEPEND_LIBS ${CUDA_LIB} ${NVJPEG_LIB})
+
+  # build CUDA source files in ultrainfer, CUDA source files include CUDA preprocessing, TRT plugins, etc.
+  enable_language(CUDA)
+  message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
+                  "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
+  include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake)
+endif()
+
+if(WITH_OPENCL)
+  add_definitions(-DWITH_OPENCL)
+endif()
+
+if(ENABLE_TRT_BACKEND)
+  set(ENABLE_PADDLE2ONNX ON)
+  if(APPLE OR IOS)
+    message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios os, please set -DENABLE_TRT_BACKEND=OFF.")
+  endif()
+  if(NOT WITH_GPU)
+    message(FATAL_ERROR "While -DENABLE_TRT_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
+  endif()
+  if(NOT BUILD_ON_JETSON)
+    if(NOT TRT_DIRECTORY)
+      set(TRT_INC_DIR /usr/include/x86_64-linux-gnu/)
+      set(TRT_LIB_DIR /usr/lib/x86_64-linux-gnu/)
+    endif()
+  endif()
+  if(BUILD_ON_JETSON)
+    set(TRT_INC_DIR /usr/include/aarch64-linux-gnu/)
+    set(TRT_LIB_DIR /usr/lib/aarch64-linux-gnu/)
+  else()
+    set(TRT_INC_DIR /usr/include/x86_64-linux-gnu/)
+    set(TRT_LIB_DIR /usr/lib/x86_64-linux-gnu/)
+    if(TRT_DIRECTORY)
+      set(TRT_INC_DIR ${TRT_DIRECTORY}/include)
+      set(TRT_LIB_DIR ${TRT_DIRECTORY}/lib)
+    endif()
+  endif()
+
+  add_definitions(-DENABLE_TRT_BACKEND)
+  include_directories(${TRT_INC_DIR})
+  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/tensorrt/common)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
+  find_library(TRT_INFER_LIB nvinfer ${TRT_LIB_DIR} NO_DEFAULT_PATH)
+  find_library(TRT_ONNX_LIB nvonnxparser ${TRT_LIB_DIR} NO_DEFAULT_PATH)
+  find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_LIB_DIR} NO_DEFAULT_PATH)
+  list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
+
+  if(NOT BUILD_ON_JETSON AND TRT_DIRECTORY)
+    if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
+      file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
+    endif()
+    if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
+      file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
+    endif()
+
+    if (NOT Python_EXECUTABLE)
+      find_package(Python COMPONENTS Interpreter Development REQUIRED)
+    endif()
+
+    message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...")
+    execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
+    file(GLOB_RECURSE TRT_STATIC_LIBS ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib/*.a)
+    if(TRT_STATIC_LIBS)
+      file(REMOVE ${TRT_STATIC_LIBS})
+    endif()
+    if(UNIX AND (NOT APPLE))
+      execute_process(COMMAND sh -c "ls *.so*" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib
+	      COMMAND sh -c "xargs ${PATCHELF_EXE} --force-rpath --set-rpath '$ORIGIN'" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib
+    	          RESULT_VARIABLE result
+                      OUTPUT_VARIABLE curr_out
+                      ERROR_VARIABLE  curr_out)
+      if(ret EQUAL "1")
+	     message(FATAL_ERROR "Failed to patchelf tensorrt libraries.")
+      endif()
+      message(STATUS "result:${result} out:${curr_out}")
+    endif()
+  endif()
+endif()
+
+if(ENABLE_VISION)
+  add_definitions(-DENABLE_VISION)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS})
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PIPELINE_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/opencv.cmake)
+
+  if(ENABLE_FLYCV)
+    add_definitions(-DENABLE_FLYCV)
+    include(${PROJECT_SOURCE_DIR}/cmake/flycv.cmake)
+    list(APPEND DEPEND_LIBS ${FLYCV_LIBRARIES})
+  endif()
+
+  if(ENABLE_CVCUDA)
+    include(${PROJECT_SOURCE_DIR}/cmake/cvcuda.cmake)
+    add_definitions(-DENABLE_CVCUDA)
+    list(APPEND DEPEND_LIBS nvcv_types cvcuda)
+  endif()
+endif()
+
+download_yaml_cpp()
+add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
+list(APPEND DEPEND_LIBS yaml-cpp)
+include_directories(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include)
+
+if(ENABLE_TEXT)
+  add_definitions(-DENABLE_TEXT)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS})
+  include(${PROJECT_SOURCE_DIR}/cmake/fast_tokenizer.cmake)
+endif()
+
+if(ENABLE_PADDLE2ONNX)
+  add_definitions(-DENABLE_PADDLE2ONNX)
+  if(BUILD_PADDLE2ONNX)
+    download_protobuf()
+    download_onnx()
+    download_optimizer()
+    include(${PROJECT_SOURCE_DIR}/cmake/build_paddle2onnx.cmake)
+    list(APPEND ALL_DEPLOY_SRCS ${PADDLE2ONNX_ALL_SRCS})
+    list(APPEND DEPEND_LIBS p2o_paddle_proto onnx)
+  else()
+    include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake)
+    list(APPEND DEPEND_LIBS external_paddle2onnx)
+  endif()
+endif(ENABLE_PADDLE2ONNX)
+
+if(WITH_CAPI)
+  include(${PROJECT_SOURCE_DIR}/c_api/CMakeLists.txt)
+  if(MSVC)
+  add_definitions(-DFD_CAPI)
+  endif()
+endif()
+
+if(WITH_CSHARPAPI)
+  if(MSVC)
+  add_subdirectory(${PROJECT_SOURCE_DIR}/csharp)
+  endif()
+endif()
+
+configure_file(${PROJECT_SOURCE_DIR}/UltraInfer.cmake.in ${PROJECT_SOURCE_DIR}/UltraInfer.cmake @ONLY)
+configure_file(${PROJECT_SOURCE_DIR}/UltraInferCSharp.cmake.in ${PROJECT_SOURCE_DIR}/UltraInferCSharp.cmake @ONLY)
+if(BUILD_FD_TRITON_BACKEND)
+  configure_file(${PROJECT_SOURCE_DIR}/python/ultrainfer/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/ultrainfer/c_lib_wrap.py)
+else()
+  configure_file(${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/c_lib_wrap.py)
+endif()
+configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py)
+
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PYBIND_SRCS})
+
+add_library(${LIBRARY_NAME} SHARED ${ALL_DEPLOY_SRCS})
+
+redefine_file_macro(${LIBRARY_NAME})
+
+file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" ULTRAINFER_VERSION)
+string(STRIP "${ULTRAINFER_VERSION}" ULTRAINFER_VERSION)
+if (APPLE)
+  set_target_properties(${LIBRARY_NAME} PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+elseif(MSVC)
+else()
+  if(WITH_GPU)
+    set_target_properties(${LIBRARY_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+    set_target_properties(${LIBRARY_NAME} PROPERTIES INTERFACE_COMPILE_OPTIONS
+       "$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CXX>>:-fvisibility=hidden>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-fvisibility=hidden>")
+  else()
+    set_target_properties(${LIBRARY_NAME} PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+  endif()
+  set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL")
+  set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS_RELEASE -s)
+endif()
+
+set_target_properties(${LIBRARY_NAME} PROPERTIES VERSION ${ULTRAINFER_VERSION})
+if(MSVC)
+  # disable warnings for dll export
+  target_compile_options(${LIBRARY_NAME} PRIVATE "$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CXX>>:/wd4251>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=/wd4251>")
+  file(GLOB FD_FILES_REQUIRE_BIGOBJ ${CSRCS_DIR_NAME}/ultrainfer/function/reduce.cc)
+  set_source_files_properties(${FD_FILES_REQUIRE_BIGOBJ} PROPERTIES COMPILE_FLAGS "/bigobj")
+endif()
+
+target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS})
+
+##################################### Examples ####################################
+if(WIN32)
+  if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
+    add_custom_target(copy_yaml_library ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/third_party/yaml-cpp  ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/yaml-cpp/lib DEPENDS ${LIBRARY_NAME})
+  else()
+    add_custom_target(copy_yaml_library ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/third_party/yaml-cpp/Release  ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/yaml-cpp/lib DEPENDS ${LIBRARY_NAME})
+    add_custom_target(copy_yaml_include ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include  ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/yaml-cpp/include DEPENDS ${LIBRARY_NAME})
+  endif()
+endif()
+
+# add examples after prepare include paths for third-parties
+if(BUILD_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
+  add_definitions(-DBUILD_EXAMPLES)
+  if(NOT EXECUTABLE_OUTPUT_PATH STREQUAL ${CMAKE_CURRENT_BINARY_DIR}/bin)
+    set(EXECUTABLE_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/bin)
+  endif()
+  include(${PROJECT_SOURCE_DIR}/cmake/gflags.cmake)
+  add_subdirectory(examples)
+endif()
+
+if (WITH_TESTING AND EXISTS ${PROJECT_SOURCE_DIR}/tests)
+  add_definitions(-DWITH_TESTING)
+  include(${PROJECT_SOURCE_DIR}/cmake/gtest.cmake)
+  if(NOT BUILD_EXAMPLES)
+    include(${PROJECT_SOURCE_DIR}/cmake/gflags.cmake)
+  endif()
+  include(${PROJECT_SOURCE_DIR}/cmake/glog.cmake)
+  add_subdirectory(tests)
+endif()
+
+include(${PROJECT_SOURCE_DIR}/cmake/summary.cmake)
+ultrainfer_summary()
+
+################################ Installation: UltraInfer C++ SDK ###############################
+if(WIN32)
+  install(
+    TARGETS ${LIBRARY_NAME}
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION lib
+    RUNTIME DESTINATION lib
+  )
+else()
+  install(
+    TARGETS ${LIBRARY_NAME}
+    LIBRARY DESTINATION lib)
+endif()
+
+install(
+  DIRECTORY ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer
+  DESTINATION ${CMAKE_INSTALL_PREFIX}/include
+  FILES_MATCHING
+  PATTERN "*.h"
+  PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/runtime/backends/*/*.h"
+)
+
+install(
+  DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install
+  DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs
+)
+
+install(
+  FILES
+  ${PROJECT_SOURCE_DIR}/LICENSE
+  ${PROJECT_SOURCE_DIR}/ThirdPartyNotices.txt
+  ${PROJECT_SOURCE_DIR}/VERSION_NUMBER
+  ${PROJECT_SOURCE_DIR}/UltraInfer.cmake
+  ${PROJECT_SOURCE_DIR}/UltraInferCSharp.cmake
+  ${PROJECT_SOURCE_DIR}/cmake/UltraInferConfig.cmake
+  ${PROJECT_SOURCE_DIR}/cmake/utils.cmake
+  ${PROJECT_SOURCE_DIR}/cmake/summary.cmake
+  DESTINATION ${CMAKE_INSTALL_PREFIX}
+)
+
+install(
+  FILES ${PROJECT_SOURCE_DIR}/cmake/gflags.cmake
+  DESTINATION ${CMAKE_INSTALL_PREFIX}/utils
+)
+
+if(NOT WIN32)
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/scripts/ultrainfer_init.sh
+    DESTINATION ${CMAKE_INSTALL_PREFIX}
+  )
+else()
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/scripts/ultrainfer_init.bat
+    DESTINATION ${CMAKE_INSTALL_PREFIX}
+  )
+endif()
+
+if(WITH_ASCEND)
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/scripts/ascend_init.sh
+    DESTINATION ${CMAKE_INSTALL_PREFIX}
+  )
+endif()
+
+if(WITH_CAPI)
+  install(
+    DIRECTORY ${PROJECT_SOURCE_DIR}/c_api/ultrainfer_capi
+    DESTINATION ${CMAKE_INSTALL_PREFIX}/include
+    FILES_MATCHING
+    PATTERN "*.h"
+    PATTERN "*/types_internal.h" EXCLUDE
+  )
+endif()
+
+include(${PROJECT_SOURCE_DIR}/cmake/config_cpack.cmake)
+
+if(WIN32 AND BUILD_EXAMPLES)
+  get_windows_path(_tmp_install_dir ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install)
+  get_windows_path(_publish_exe_dir ${EXECUTABLE_OUTPUT_PATH}/Release)
+  list(GET CMAKE_CONFIGURATION_TYPES 0 _CONFIG_TYPE)
+  if((${CMAKE_BUILD_TYPE} MATCHES "Release") OR (${_CONFIG_TYPE} MATCHES "Release"))
+    install(TARGETS ${LIBRARY_NAME} RUNTIME DESTINATION ${EXECUTABLE_OUTPUT_PATH}/Release)
+    add_custom_target(
+      copy_fd_third_dlls_examples ALL COMMAND
+      cmd /C ${PROJECT_SOURCE_DIR}/scripts/ultrainfer_init.bat install ${_tmp_install_dir} ${_publish_exe_dir} noconfirm)
+    add_dependencies(copy_fd_third_dlls_examples ${LIBRARY_NAME} copy_yaml_library)
+  endif()
+endif()
+
+############################### Building: UltraInfer Python Wheel #############################
+if(BUILD_ULTRAINFER_PYTHON)
+  add_definitions(-DBUILD_ULTRAINFER_PYTHON)
+  if("${PY_EXT_SUFFIX}" STREQUAL "")
+    if(MSVC)
+      set(PY_EXT_SUFFIX ".pyd")
+    else()
+      set(PY_EXT_SUFFIX ".so")
+    endif()
+  endif()
+
+  # find_package Python has replaced PythonInterp and PythonLibs since cmake 3.12
+  # Use the following command in the future; now this is only compatible with the latest pybind11
+  # find_package(Python ${PY_VERSION} COMPONENTS Interpreter Development REQUIRED)
+  find_package(PythonInterp ${PY_VERSION} REQUIRED)
+  find_package(PythonLibs ${PY_VERSION})
+  if(CMAKE_SYSTEM_NAME STREQUAL "AIX")
+    set(CMAKE_NO_SYSTEM_FROM_IMPORTED 1)
+  endif()
+
+  if(NOT ENABLE_VISION)
+    file(GLOB_RECURSE VISION_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/vision/*_pybind.cc)
+    file(GLOB_RECURSE PIPELINE_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/pipeline/*_pybind.cc)
+    list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${VISION_PYBIND_SRCS} ${PIPELINE_PYBIND_SRCS})
+  endif()
+
+  if (NOT ENABLE_TEXT)
+    file(GLOB_RECURSE TEXT_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultrainfer/text/*_pybind.cc)
+    list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${TEXT_PYBIND_SRCS})
+  endif()
+
+  add_library(${PY_LIBRARY_NAME} MODULE ${DEPLOY_PYBIND_SRCS})
+  redefine_file_macro(${PY_LIBRARY_NAME})
+  set_target_properties(${PY_LIBRARY_NAME} PROPERTIES PREFIX "")
+  set_target_properties(${PY_LIBRARY_NAME}
+                        PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+  set_target_properties(${PY_LIBRARY_NAME} PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+  set_target_properties(${PY_LIBRARY_NAME}
+                        PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+  target_include_directories(${PY_LIBRARY_NAME} PRIVATE
+                             $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+                             $<INSTALL_INTERFACE:include>
+                             ${PYTHON_INCLUDE_DIR})
+
+  download_pybind()
+  target_include_directories(${PY_LIBRARY_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/third_party/pybind11/include)
+  download_dlpack()
+  target_include_directories(${PY_LIBRARY_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/third_party/dlpack/include)
+
+  if(APPLE)
+    set_target_properties(${PY_LIBRARY_NAME}
+                          PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+  endif()
+
+  target_link_libraries(${PY_LIBRARY_NAME} PUBLIC ${LIBRARY_NAME})
+
+  if(MSVC)
+    target_link_libraries(${PY_LIBRARY_NAME} PRIVATE ${PYTHON_LIBRARIES})
+    target_compile_options(${PY_LIBRARY_NAME}
+                           PRIVATE /MP
+                                   /wd4244 # 'argument': conversion from 'google::
+                                           # protobuf::uint64' to 'int', possible
+                                           # loss of data
+                                   /wd4267 # Conversion from 'size_t' to 'int',
+                                           # possible loss of data
+                                   /wd4996 # The second parameter is ignored.
+                                   ${EXTRA_FLAGS})
+    target_compile_options(${PY_LIBRARY_NAME} PRIVATE $<$<NOT:$<CONFIG:Debug>>:/MT> $<$<CONFIG:Debug>:/MTd>)
+  endif()
+
+  file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs)
+  file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs)
+
+  if(WIN32)
+    add_custom_target(copy_fd_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy_directory   ${CMAKE_CURRENT_BINARY_DIR}/Release ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/ DEPENDS ${PY_LIBRARY_NAME})
+  elseif(APPLE)
+    add_custom_target(copy_fd_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/*.so** ${CMAKE_CURRENT_BINARY_DIR}/*.dylib** ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/ DEPENDS ${PY_LIBRARY_NAME})
+  else()
+    add_custom_target(copy_fd_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/*.so* ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/ DEPENDS ${PY_LIBRARY_NAME})
+  endif()
+  add_custom_target(copy_third_libraries ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install ${PROJECT_SOURCE_DIR}/python/${LIBRARY_NAME}/libs/third_libs DEPENDS ${PY_LIBRARY_NAME})
+endif(BUILD_ULTRAINFER_PYTHON)
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0")
+    string(STRIP "${CMAKE_CXX_COMPILER_VERSION}" CMAKE_CXX_COMPILER_VERSION)
+    message(FATAL_ERROR "[ERROR] UltraInfer require g++ version >= 5.4.0, but now your g++ version is ${CMAKE_CXX_COMPILER_VERSION}, this may cause failure! Use -DCMAKE_CXX_COMPILER to define path of your compiler.")
+  endif()
+endif()
diff --git a/libs/ultrainfer/LICENSE b/libs/ultrainfer/LICENSE
new file mode 100755
index 0000000000..261eeb9e9f
--- /dev/null
+++ b/libs/ultrainfer/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/libs/ultrainfer/ThirdPartyNotices.txt b/libs/ultrainfer/ThirdPartyNotices.txt
new file mode 100755
index 0000000000..35f6dddac8
--- /dev/null
+++ b/libs/ultrainfer/ThirdPartyNotices.txt
@@ -0,0 +1,1946 @@
+This project depends on some open source projects, list as below
+
+--------
+1. https://github.com/protocolbuffers/protobuf
+
+Copyright 2008 Google Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Code generated by the Protocol Buffer compiler is owned by the owner
+of the input file used when generating it.  This code is not
+standalone and requires a support library to be linked with it.  This
+support library is itself covered by the above license.
+
+--------
+2. https://github.com/onnx/onnx
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+--------
+3. https://github.com/microsoft/onnxruntime
+
+MIT License
+
+Copyright (c) Microsoft Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------
+4. https://github.com/pybind/pybind11
+
+Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
+external contributions to this project including patches, pull requests, etc.
+
+--------
+5. https://github.com/onnx/onnx-tensorrt
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 NVIDIA Corporation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+--------
+6. https://github.com/opencv/opencv
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+--------
+7. https://github.com/jbeder/yaml-cpp
+
+Copyright (c) 2008-2015 Jesse Beder.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+---------
+8. https://github.com/oneapi-src/oneDNN/
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   ============================================================================
+
+   Copyright 2016-2021 Intel Corporation
+   Copyright 2018 YANDEX LLC
+   Copyright 2019-2021 FUJITSU LIMITED
+   Copyright 2020 Arm Limited and affiliates
+   Copyright 2020 Codeplay Software Limited
+   Copyright 2021 Alanna Tempest
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   This distribution includes third party software ("third party programs").
+   This third party software, even if included with the distribution of
+   the Intel software, may be governed by separate license terms, including
+   without limitation, third party license terms, other Intel software license
+   terms, and open source software license terms. These separate license terms
+   govern your use of the third party programs as set forth in the
+   "THIRD-PARTY-PROGRAMS" file.
+
+--------
+9. https://github.com/openvinotoolkit/openvino
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+--------
+10. https://gitlab.com/libeigen/eigen
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+--------
+11. https://github.com/PaddlePaddle/PaddleNLP
+
+Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+--------
+12. https://github.com/openssl/openssl
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+--------
+13. https://github.com/dmlc/dlpack
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2017 by Contributors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/libs/ultrainfer/UltraInfer.cmake.in b/libs/ultrainfer/UltraInfer.cmake.in
new file mode 100755
index 0000000000..7c05344958
--- /dev/null
+++ b/libs/ultrainfer/UltraInfer.cmake.in
@@ -0,0 +1,335 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.8)
+
+# UltraInfer basic infos
+set(ULTRAINFER_VERSION @ULTRAINFER_VERSION@)
+set(LIBRARY_NAME @LIBRARY_NAME@)
+
+# If compile with GLIBC_CXX_ABI=0
+set(NEED_ABI0 @NEED_ABI0@)
+
+# Hardware and Language API
+set(WITH_GPU @WITH_GPU@)
+set(WITH_IPU @WITH_IPU@)
+set(WITH_OPENCL @WITH_OPENCL@)
+set(WITH_ASCEND @WITH_ASCEND@)
+set(WITH_DIRECTML @WITH_DIRECTML@)
+set(WITH_TIMVX @WITH_TIMVX@)
+set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@)
+set(WITH_CAPI @WITH_CAPI@)
+set(WITH_CSHARPAPI @WITH_CSHARPAPI@)
+set(WITH_TESTING @WITH_TESTING@)
+set(BUILD_ON_JETSON @BUILD_ON_JETSON@)
+set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
+
+# Inference backend and UltraInfer Moudle
+set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
+set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@)
+set(ENABLE_TVM_BACKEND @ENABLE_TVM_BACKEND@)
+set(ENABLE_HORIZON_BACKEND @ENABLE_HORIZON_BACKEND@)
+set(ENABLE_SOPHGO_BACKEND @ENABLE_SOPHGO_BACKEND@)
+set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@)
+set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
+set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
+set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@)
+set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
+set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@)
+set(BUILD_PADDLE2ONNX @BUILD_PADDLE2ONNX@)
+
+set(ENABLE_VISION @ENABLE_VISION@)
+set(ENABLE_FLYCV @ENABLE_FLYCV@)
+set(ENABLE_CVCUDA @ENABLE_CVCUDA@)
+set(ENABLE_TEXT @ENABLE_TEXT@)
+set(ENABLE_BENCHMARK @ENABLE_BENCHMARK@)
+
+# Version infos and custom settings for third libs
+set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@)
+set(POROS_VERSION @POROS_VERSION@)
+set(OPENVINO_VERSION @OPENVINO_VERSION@)
+set(OPENCV_FILENAME @OPENCV_FILENAME@)
+set(OPENVINO_FILENAME @OPENVINO_FILENAME@)
+set(PADDLELITE_FILENAME @PADDLELITE_FILENAME@)
+set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
+set(ORT_DIRECTORY "@ORT_DIRECTORY@")
+set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
+
+set(ULTRAINFER_LIBS "")
+set(ULTRAINFER_INCS "")
+list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/include)
+
+# Note(zhoushunjie): include some useful utils function
+include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake)
+
+# Set C++11 as standard for the whole project
+if(NOT MSVC)
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_FLAGS "-Wno-format")
+  if(NEED_ABI0)
+    add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+  else()
+    add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
+  endif()
+endif(NOT MSVC)
+
+# Still need omp while using UltraInfer static lib.
+# This is due to the use of openmp for Paddle Lite's
+# static library.
+
+find_library(FDLIB ${LIBRARY_NAME} ${CMAKE_CURRENT_LIST_DIR}/lib NO_DEFAULT_PATH)
+list(APPEND ULTRAINFER_LIBS ${FDLIB})
+
+if(ENABLE_ORT_BACKEND)
+  if (ORT_DIRECTORY)
+    set(ORT_LIB_PATH ${ORT_DIRECTORY}/lib)
+  else()
+    set(ORT_LIB_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/onnxruntime/lib)
+  endif()
+  message(STATUS "The path of ONNXRuntime is ${ORT_LIB_PATH}.")
+  find_library(ORT_LIB onnxruntime ${ORT_LIB_PATH}  NO_DEFAULT_PATH)
+  list(APPEND ULTRAINFER_LIBS ${ORT_LIB})
+endif()
+
+if(ENABLE_TVM_BACKEND)
+    if(APPLE)
+        set(TVM_RUNTIME_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tvm/lib/libtvm_runtime.dylib)
+    else()
+        set(TVM_RUNTIME_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tvm/lib/libtvm_runtime.so)
+    endif()
+    list(APPEND ULTRAINFER_LIBS ${TVM_RUNTIME_LIB})
+endif()
+
+if(ENABLE_PADDLE_BACKEND)
+  string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ "${PADDLEINFERENCE_VERSION}")
+  set(PADDLEINFERENCE_VERSION_MAJOR "${CMAKE_MATCH_1}")
+  set(PADDLEINFERENCE_VERSION_MINOR "${CMAKE_MATCH_2}")
+  set(PADDLEINFERENCE_VERSION_PATCH "${CMAKE_MATCH_3}")
+  find_library(PADDLE_LIB paddle_inference ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/paddle/lib NO_DEFAULT_PATH)
+  if(WIN32)
+    if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2)
+      set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/mkldnn.lib")
+    else()
+      set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/onednn/lib/dnnl.lib")
+    endif()
+    set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5md.lib")
+  elseif(APPLE)
+    message(STATUS "No third parties libs(mkldnn and omp) need to link into paddle_inference on MacOS OSX.")
+  else()
+    if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2)
+      set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/libmkldnn.so.0")
+    else()
+      set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/onednn/lib/libdnnl.so.3")
+    endif()
+    set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.so")
+  endif()
+  list(APPEND ULTRAINFER_LIBS ${PADDLE_LIB})
+  if(EXISTS "${DNNL_LIB}")
+    list(APPEND ULTRAINFER_LIBS ${DNNL_LIB} ${IOMP_LIB})
+  endif()
+endif()
+
+if(ENABLE_OPENVINO_BACKEND)
+  if (OPENVINO_DIRECTORY)
+    set(OPENVINO_DIR ${OPENVINO_DIRECTORY})
+  else()
+    set(OPENVINO_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${OPENVINO_FILENAME}/runtime)
+  endif()
+  get_openvino_libs(${OPENVINO_DIR})
+  message(STATUS "OPENVINO_LIBS = ${OPENVINO_LIBS}")
+  list(APPEND ULTRAINFER_LIBS ${OPENVINO_LIBS})
+endif()
+
+if(ENABLE_RKNPU2_BACKEND)
+  if(RKNN2_TARGET_SOC STREQUAL "RK356X")
+    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
+  elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
+    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
+  else ()
+    message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
+  endif()
+  message(STATUS "The path of RKNPU2 is ${RKNPU2_LIB}.")
+  list(APPEND ULTRAINFER_LIBS ${RKNPU2_LIB})
+endif()
+
+if(ENABLE_HORIZON_BACKEND)
+  set(DNN_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/dnn)
+  set(APPSDK_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/appsdk/appuser/)
+
+  set(DNN_LIB_PATH ${DNN_PATH}/lib)
+  set(APPSDK_LIB_PATH ${APPSDK_PATH}/lib/hbbpu)
+  set(BPU_libs dnn cnn_intf hbrt_bernoulli_aarch64)
+
+  link_directories(${DNN_LIB_PATH}
+                  ${APPSDK_PATH}/lib/hbbpu
+                  ${APPSDK_PATH}/lib)
+
+  list(APPEND ULTRAINFER_LIBS ${BPU_libs})
+
+endif()
+if(ENABLE_LITE_BACKEND)
+  set(LITE_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${PADDLELITE_FILENAME})
+  # Linux/Mac/Win/...
+  find_library(LITE_LIB paddle_full_api_shared ${LITE_DIR}/lib NO_DEFAULT_PATH)
+  list(APPEND ULTRAINFER_LIBS ${LITE_LIB})
+endif()
+
+if(ENABLE_POROS_BACKEND)
+  find_library(POROS_LIB poros ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/lib NO_DEFAULT_PATH)
+  find_library(TORCH_LIB torch ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/torch/lib NO_DEFAULT_PATH)
+  set(TORCH_INCLUDE "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/torch/include")
+  list(APPEND ULTRAINFER_LIBS ${POROS_LIB} ${TORCH_LIB})
+  list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include ${TORCH_INCLUDE})
+endif()
+
+if(WITH_GPU)
+  if(NOT CUDA_DIRECTORY)
+    set(CUDA_DIRECTORY "/usr/local/cuda")
+  endif()
+  if(WIN32)
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
+    find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64)
+  else()
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
+    if(NOT BUILD_ON_JETSON)
+      find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64)
+    endif()
+  endif()
+  if(NOT CUDA_LIB)
+    message(FATAL_ERROR "[UltraInfer] Cannot find library cudart in ${CUDA_DIRECTORY}, Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/path/to/cuda")
+  endif()
+  list(APPEND ULTRAINFER_LIBS ${CUDA_LIB} ${NVJPEG_LIB})
+  list(APPEND ULTRAINFER_INCS ${CUDA_DIRECTORY}/include)
+
+  if(ENABLE_TRT_BACKEND)
+    if(BUILD_ON_JETSON)
+      find_library(TRT_INFER_LIB nvinfer /usr/lib/aarch64-linux-gnu/)
+      find_library(TRT_ONNX_LIB nvonnxparser /usr/lib/aarch64-linux-gnu/)
+      find_library(TRT_PLUGIN_LIB nvinfer_plugin /usr/lib/aarch64-linux-gnu/)
+    else()
+      if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/)
+        find_library(TRT_INFER_LIB nvinfer ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH)
+        find_library(TRT_ONNX_LIB nvonnxparser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH)
+        find_library(TRT_PLUGIN_LIB nvinfer_plugin ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH)
+      else()
+        find_library(TRT_INFER_LIB nvinfer /usr/lib/x86_64-linux-gnu/)
+        find_library(TRT_ONNX_LIB nvonnxparser /usr/lib/x86_64-linux-gnu/)
+        find_library(TRT_PLUGIN_LIB nvinfer_plugin /usr/lib/x86_64-linux-gnu/)
+      endif()
+    endif()
+    list(APPEND ULTRAINFER_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
+  endif()
+endif()
+
+if(ENABLE_VISION)
+  if(OPENCV_DIRECTORY)
+    set(OpenCV_DIR ${OPENCV_DIRECTORY})
+  else()
+    set(OpenCV_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${OPENCV_FILENAME})
+    if(WIN32)
+      set(OpenCV_DIR ${OpenCV_DIR}/build)
+    endif()
+  endif()
+  message(STATUS "The path of OpenCV is ${OpenCV_DIR}.")
+
+  # Win/Linux/Mac
+  find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR} NO_DEFAULT_PATH)
+  list(APPEND ULTRAINFER_INCS ${OpenCV_INCLUDE_DIRS})
+  list(APPEND ULTRAINFER_LIBS ${OpenCV_LIBS})
+
+  if(ENABLE_FLYCV)
+    include_directories(${CMAKE_CURRENT_LIST_DIR}/third_libs/install/flycv/include)
+    set(FLYCV_LIB_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/flycv/lib)
+
+    find_library(FLYCV_LIB flycv_shared ${FLYCV_LIB_DIR} NO_DEFAULT_PATH)
+    list(APPEND ULTRAINFER_LIBS ${FLYCV_LIB})
+  endif()
+
+  if(ENABLE_CVCUDA)
+    find_library(CVCUDA_LIB cvcuda ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH)
+    find_library(NVCV_TYPES_LIB nvcv_types ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH)
+    list(APPEND ULTRAINFER_LIBS ${CVCUDA_LIB} ${NVCV_TYPES_LIB})
+    list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/include NO_DEFAULT_PATH)
+    add_definitions(-DENABLE_CVCUDA)
+  endif()
+
+endif()
+
+if (ENABLE_TEXT)
+  # Add dependency libs later: Linux/Mac/Win/...
+  find_library(FAST_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib NO_DEFAULT_PATH)
+  list(APPEND ULTRAINFER_LIBS ${FAST_TOKENIZER_LIB})
+
+  list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/include)
+  list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include)
+endif()
+
+if(ENABLE_PADDLE2ONNX)
+  if(NOT BUILD_PADDLE2ONNX)
+    find_library(PADDLE2ONNX_LIB paddle2onnx  ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib NO_DEFAULT_PATH)
+    list(APPEND ULTRAINFER_LIBS ${PADDLE2ONNX_LIB})
+  endif()
+endif()
+
+if(WITH_KUNLUNXIN)
+  list(APPEND ULTRAINFER_LIBS -lpthread -lrt -ldl)
+endif()
+
+remove_duplicate_libraries(ULTRAINFER_LIBS)
+
+include(${CMAKE_CURRENT_LIST_DIR}/summary.cmake)
+ultrainfer_summary()
+message(STATUS "  DEPENDENCY_LIBS           : ${ULTRAINFER_LIBS}")
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0")
+    string(STRIP "${CMAKE_CXX_COMPILER_VERSION}" CMAKE_CXX_COMPILER_VERSION)
+    message(FATAL_ERROR "[ERROR] UltraInfer require g++ version >= 5.4.0, but now your g++ version is ${CMAKE_CXX_COMPILER_VERSION}, this may cause failure! Use -DCMAKE_CXX_COMPILER to define path of your compiler.")
+  endif()
+endif()
+
+function(install_ultrainfer_libraries DESTINATION_DIR)
+  set(DYN_LIB_SUFFIX "*.so*")
+  if(WIN32)
+    set(DYN_LIB_SUFFIX "*.dll")
+  elseif(APPLE)
+    set(DYN_LIB_SUFFIX "*.dylib*")
+  endif()
+  if(UltraInfer_DIR)
+    set(DYN_SEARCH_DIR ${UltraInfer_DIR})
+  elseif(ULTRAINFER_INSTALL_DIR)
+    set(DYN_SEARCH_DIR ${ULTRAINFER_INSTALL_DIR})
+  else()
+    message(FATAL_ERROR "Please set UltraInfer_DIR/ULTRAINFER_INSTALL_DIR before call install_ultrainfer_libraries.")
+  endif()
+  file(GLOB_RECURSE ALL_NEED_DYN_LIBS ${DYN_SEARCH_DIR}/lib/${DYN_LIB_SUFFIX})
+  file(GLOB_RECURSE ALL_DEPS_DYN_LIBS ${DYN_SEARCH_DIR}/third_libs/${DYN_LIB_SUFFIX})
+
+  if(ENABLE_VISION)
+    # OpenCV
+    file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_DIR}/${DYN_LIB_SUFFIX})
+    list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_OPENCV_DYN_LIBS})
+
+    if(WIN32)
+      file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/x64/vc15/bin/${DYN_LIB_SUFFIX})
+      file(INSTALL ${OPENCV_DYN_LIBS} DESTINATION ${DESTINATION_DIR})
+    else() # linux/mac
+      file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/lib/${DYN_LIB_SUFFIX})
+      file(INSTALL ${OPENCV_DYN_LIBS} DESTINATION ${DESTINATION_DIR})
+    endif()
+
+    # FlyCV
+    if(ENABLE_FLYCV)
+      file(GLOB_RECURSE ALL_FLYCV_DYN_LIBS ${FLYCV_LIB_DIR}/${DYN_LIB_SUFFIX})
+      list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_FLYCV_DYN_LIBS})
+    endif()
+  endif()
+
+  if(ENABLE_OPENVINO_BACKEND)
+    # need plugins.xml for openvino backend
+    set(OPENVINO_RUNTIME_BIN_DIR ${OPENVINO_DIR}/bin)
+    file(GLOB OPENVINO_PLUGIN_XML ${OPENVINO_RUNTIME_BIN_DIR}/*.xml)
+    file(INSTALL ${OPENVINO_PLUGIN_XML} DESTINATION ${DESTINATION_DIR})
+  endif()
+
+  # Install other libraries
+  file(INSTALL ${ALL_NEED_DYN_LIBS} DESTINATION ${DESTINATION_DIR})
+  file(INSTALL ${ALL_DEPS_DYN_LIBS} DESTINATION ${DESTINATION_DIR})
+endfunction()
diff --git a/libs/ultrainfer/UltraInferCSharp.cmake.in b/libs/ultrainfer/UltraInferCSharp.cmake.in
new file mode 100755
index 0000000000..f247a66fe4
--- /dev/null
+++ b/libs/ultrainfer/UltraInferCSharp.cmake.in
@@ -0,0 +1,13 @@
+list(APPEND ULTRAINFER_DOTNET_REFERENCES
+    "Microsoft.CSharp"
+    "System"
+    "System.Core"
+    "System.Data"
+    "System.Deployment"
+    "System.Drawing"
+    "System.Net.Http"
+    "System.Xml"
+    "System.Reflection"
+    "${CMAKE_CURRENT_LIST_DIR}/csharp_lib/ultrainfer_csharp.dll")
+
+set(ULTRAINFER_PACKAGE_REFERENCES "OpenCvSharp4_4.7.0.20230115;OpenCvSharp4.runtime.win_4.7.0.20230115")
diff --git a/libs/ultrainfer/VERSION_NUMBER b/libs/ultrainfer/VERSION_NUMBER
new file mode 100755
index 0000000000..77d6f4ca23
--- /dev/null
+++ b/libs/ultrainfer/VERSION_NUMBER
@@ -0,0 +1 @@
+0.0.0
diff --git a/libs/ultrainfer/cmake/UltraInferConfig.cmake b/libs/ultrainfer/cmake/UltraInferConfig.cmake
new file mode 100755
index 0000000000..02b6e0de36
--- /dev/null
+++ b/libs/ultrainfer/cmake/UltraInferConfig.cmake
@@ -0,0 +1,10 @@
+#    This file will define the following variables for find_package method:
+#      - UltraInfer_LIBS                 : The list of libraries to link against.
+#      - UltraInfer_INCLUDE_DIRS         : The UltraInfer include directories.
+#      - UltraInfer_Found                : The status of UltraInfer
+
+include(${CMAKE_CURRENT_LIST_DIR}/UltraInfer.cmake)
+# setup UltraInfer cmake variables
+set(UltraInfer_LIBS ${ULTRAINFER_LIBS})
+set(UltraInfer_INCLUDE_DIRS ${ULTRAINFER_INCS})
+set(UltraInfer_FOUND TRUE)    
diff --git a/libs/ultrainfer/cmake/ascend.cmake b/libs/ultrainfer/cmake/ascend.cmake
new file mode 100755
index 0000000000..253563d853
--- /dev/null
+++ b/libs/ultrainfer/cmake/ascend.cmake
@@ -0,0 +1,32 @@
+if(NOT ${ENABLE_LITE_BACKEND})
+  set(ENABLE_LITE_BACKEND ON)
+endif()
+
+if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+  if (NOT BUILD_ULTRAINFER_PYTHON)
+    message(STATUS "Build UltraInfer Ascend C++ library on X86 platform.")
+    if(NOT PADDLELITE_URL)
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.CPP.inference_lite_lib.ubuntu.x86.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz")
+    endif()
+  else ()
+    message(STATUS "Build UltraInfer Ascend Python library on X86 platform.")
+    if(NOT PADDLELITE_URL)
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.Python.inference_lite_lib.ubuntu.x86.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz") 
+    endif()
+  endif()	
+endif()
+
+
+if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+  if (NOT BUILD_ULTRAINFER_PYTHON)
+    message(STATUS "Build UltraInfer Ascend C++ library on aarch64 platform.")
+    if(NOT PADDLELITE_URL)
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.CPP.inference_lite_lib.ubuntu.armv8.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz")
+    endif()
+  else ()
+    message(STATUS "Build UltraInfer Ascend Python library on aarch64 platform.")
+    if(NOT PADDLELITE_URL)
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.Python.inference_lite_lib.ubuntu.armv8.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz")
+    endif()
+  endif()	
+endif()
diff --git a/libs/ultrainfer/cmake/build_paddle2onnx.cmake b/libs/ultrainfer/cmake/build_paddle2onnx.cmake
new file mode 100755
index 0000000000..edacfafbc4
--- /dev/null
+++ b/libs/ultrainfer/cmake/build_paddle2onnx.cmake
@@ -0,0 +1,40 @@
+add_definitions(-DMAX_ONNX_OPSET_VERSION=16)
+add_definitions(-DPADDLE2ONNX_LIB)
+
+# Third dependency: onnx
+if(NOT TARGET onnx_proto)
+  if(NOT ONNX_NAMESPACE)
+    set(ONNX_NAMESPACE "paddle2onnx")
+  endif()
+  add_definitions("-DONNX_NAMESPACE=${ONNX_NAMESPACE}")
+
+  set(MSVC_STATIC_CRT ON)
+  if(ONNX_CUSTOM_PROTOC_PATH)
+    if(WIN32)
+      if(MSVC_STATIC_CRT)
+        # MT
+        set(ONNX_USE_MSVC_STATIC_RUNTIME ON)
+      else()
+        # MD
+        set(ONNX_USE_MSVC_STATIC_RUNTIME OFF)
+      endif()
+      set(ONNX_CUSTOM_PROTOC_PATH "${ONNX_CUSTOM_PROTOC_PATH};$ENV{PATH}")
+    else()
+      set(ONNX_CUSTOM_PROTOC_PATH "${ONNX_CUSTOM_PROTOC_PATH}:$ENV{PATH}")
+    endif()
+    set(ENV{PATH} ${ONNX_CUSTOM_PROTOC_PATH})
+  endif()
+
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/onnx)
+endif()
+
+include_directories(${PROJECT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/third_party/onnx)
+
+include_directories(${PROJECT_SOURCE_DIR}/third_party/optimizer)
+add_subdirectory(${PROJECT_SOURCE_DIR}/paddle2onnx/proto)
+
+file(GLOB_RECURSE PADDLE2ONNX_ALL_SRCS ${PROJECT_SOURCE_DIR}/paddle2onnx/*.cc ${PROJECT_SOURCE_DIR}/third_party/optimizer/onnxoptimizer/*.cc)
+list(REMOVE_ITEM PADDLE2ONNX_ALL_SRCS  ${PROJECT_SOURCE_DIR}/paddle2onnx/cpp2py_export.cc ${PROJECT_SOURCE_DIR}/third_party/optimizer/onnxoptimizer/cpp2py_export.cc)
diff --git a/libs/ultrainfer/cmake/build_tools.cmake b/libs/ultrainfer/cmake/build_tools.cmake
new file mode 100755
index 0000000000..c091f4916e
--- /dev/null
+++ b/libs/ultrainfer/cmake/build_tools.cmake
@@ -0,0 +1,87 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+function(download_patchelf)
+  if(UNIX AND (NOT APPLE))
+    set(PATCHELF_EXE "patchelf")
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/patchelf-0.15.0-aarch64.tar.gz)
+      download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/patchelf-0.15.0-aarch64.tar.gz ${THIRD_PARTY_PATH}/patchelf)
+    else()
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/patchelf-0.15.0-x86_64.tar.gz)
+      download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/patchelf-0.15.0-x86_64.tar.gz ${THIRD_PARTY_PATH}/patchelf)
+    endif()
+  endif()
+endfunction()
+
+function(download_protobuf)
+  if(WIN32)
+    if(NOT CMAKE_CL_64)
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-win-x86-3.16.0.zip)
+    else()
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-win-x64-3.16.0.zip)
+    endif()
+    set(ORIGIN_ENV_PATH "$ENV{PATH}")
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/protobuf-win-3.16.0.tgz ${THIRD_PARTY_PATH}/protobuf)
+    set(ENV{PATH} "${THIRD_PARTY_PATH}\\protobuf\\bin;${ORIGIN_ENV_PATH}")
+  elseif(APPLE)
+    if(CURRENT_OSX_ARCH MATCHES "arm64")
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-osx-arm64-3.16.0.tgz)
+    else()
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-osx-x86_64-3.16.0.tgz)
+    endif()
+    set(ORIGIN_ENV_PATH "$ENV{PATH}")
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/protobuf-osx-3.16.0.tgz ${THIRD_PARTY_PATH}/protobuf)
+    set(ENV{PATH} "${THIRD_PARTY_PATH}/protobuf/bin/:${ORIGIN_ENV_PATH}")
+  else()
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-linux-aarch64-3.16.0.tgz)
+    else()
+      set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_libs/protobuf-linux-x64-3.16.0.tgz)
+    endif()
+    set(ORIGIN_ENV_PATH "$ENV{PATH}")
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/protobuf-linux-3.16.0.tgz ${THIRD_PARTY_PATH}/protobuf)
+    set(ENV{PATH} "${THIRD_PARTY_PATH}/protobuf/bin/:${ORIGIN_ENV_PATH}")
+  endif()
+endfunction()
+
+function(download_eigen)
+    set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/eigen-linux-x86-241210.tgz)
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/eigen-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/eigen)
+endfunction()
+
+function(download_yaml_cpp)
+    set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/yaml-cpp-linux-x86-241210.tgz)
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/yaml-cpp-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/yaml-cpp)
+endfunction()
+
+function(download_pybind)
+    set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/pybind11-linux-x86-241210.tgz)
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/pybind11-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/pybind11)
+endfunction()
+
+function(download_dlpack)
+    set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/dlpack-linux-x86-241210.tgz)
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/dlpack-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/dlpack)
+endfunction()
+
+function(download_onnx)
+    set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/onnx-linux-x86-241210.tgz)
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/onnx-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/onnx)
+endfunction()
+
+function(download_optimizer)
+    set(PATCHELF_URL https://bj.bcebos.com/fastdeploy/third_party/optimizer-linux-x86-241210.tgz)
+    download_and_decompress(${PATCHELF_URL} ${CMAKE_CURRENT_BINARY_DIR}/optimizer-linux-x86-241210.tgz ${THIRD_PARTY_DIR}/optimizer)
+endfunction()
diff --git a/libs/ultrainfer/cmake/check.cmake b/libs/ultrainfer/cmake/check.cmake
new file mode 100755
index 0000000000..0bfc4546f7
--- /dev/null
+++ b/libs/ultrainfer/cmake/check.cmake
@@ -0,0 +1,45 @@
+# Check for 32bit system
+if(WIN32)
+  if(NOT CMAKE_CL_64)
+    message("***********************Compile on non 64-bit system now**********************")
+    add_definitions(-DNON_64_PLATFORM)
+    if(WITH_GPU)
+      message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.")
+    endif()
+    if(ENABLE_PADDLE_BACKEND)
+      message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.")
+    endif()
+    if(ENABLE_POROS_BACKEND)
+      message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
+    endif()
+  endif()
+endif()
+
+if(IOS)
+  if(ENABLE_ORT_BACKEND)
+    message(FATAL_ERROR "Not support ONNXRuntime backend for IOS now. Please set ENABLE_ORT_BACKEND=OFF.")
+  endif()
+  if(ENABLE_PADDLE_BACKEND)
+    message(FATAL_ERROR "Not support Paddle backend for IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.")
+  endif()
+  if(ENABLE_OPENVINO_BACKEND)
+    message(FATAL_ERROR "Not support OpenVINO backend for IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.")
+  endif()
+  if(ENABLE_TRT_BACKEND)
+    message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.")
+  endif()
+endif()
+
+if(WITH_GPU)
+  if(APPLE)
+    message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
+  elseif(IOS)
+    message(FATAL_ERROR "Cannot enable GPU while compling in IOS.")
+  endif()
+endif()
+
+if(WITH_OPENCL)
+  if(NOT ENABLE_LITE_BACKEND)
+    message(FATAL_ERROR "Cannot enable OpenCL while compling unless in Paddle Lite backend is enbaled.")
+  endif()
+endif()
diff --git a/libs/ultrainfer/cmake/config_cpack.cmake b/libs/ultrainfer/cmake/config_cpack.cmake
new file mode 100755
index 0000000000..f0f5e8c8b9
--- /dev/null
+++ b/libs/ultrainfer/cmake/config_cpack.cmake
@@ -0,0 +1,38 @@
+if(NOT UNIX)
+  return()
+endif()
+
+set(PACKAGE_SYS_VERSION "linux")
+if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+  set(PACKAGE_SYS_VERSION "${PACKAGE_SYS_VERSION}-aarch64")
+else()
+  set(PACKAGE_SYS_VERSION "${PACKAGE_SYS_VERSION}-x64")
+endif()
+if(WITH_GPU)
+  set(PACKAGE_SYS_VERSION "${PACKAGE_SYS_VERSION}-gpu")
+endif()
+
+# set(CPACK_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION ON)
+set(CPACK_VERBATIM_VARIABLES TRUE)
+set(CPACK_GENERATOR DEB RPM)
+set(CPACK_THREADS 0)
+set(CPACK_PACKAGE_CONTACT "ultrainfer@baidu.com")
+set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+set(CPACK_PACKAGE_VERSION "${ULTRAINFER_VERSION}")
+set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PACKAGE_SYS_VERSION}-${ULTRAINFER_VERSION}")
+set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
+
+set(CPACK_DEBIAN_PACKAGE_CONTROL_STRICT_PERMISSION TRUE)
+configure_file(cpack/debian_postinst.in cpack/postinst @ONLY)
+configure_file(cpack/debian_prerm.in cpack/prerm @ONLY)
+set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA
+    "${CMAKE_CURRENT_BINARY_DIR}/cpack/postinst"
+    "${CMAKE_CURRENT_BINARY_DIR}/cpack/prerm")
+
+set(CPACK_RPM_PACKAGE_AUTOREQ FALSE)
+configure_file(cpack/rpm_postinst.in cpack/rpm_postinst @ONLY)
+configure_file(cpack/rpm_postrm.in cpack/rpm_postrm @ONLY)
+set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/cpack/rpm_postinst")
+set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/cpack/rpm_postrm")
+
+include(CPack)
diff --git a/libs/ultrainfer/cmake/cuda.cmake b/libs/ultrainfer/cmake/cuda.cmake
new file mode 100755
index 0000000000..70811c6122
--- /dev/null
+++ b/libs/ultrainfer/cmake/cuda.cmake
@@ -0,0 +1,283 @@
+if(NOT WITH_GPU)
+  return()
+endif()
+
+# This is to eliminate the CMP0104 warnings from cmake 3.18+.
+# Instead of setting CUDA_ARCHITECTURES, we will set CMAKE_CUDA_FLAGS.
+set(CMAKE_CUDA_ARCHITECTURES OFF)
+
+if(BUILD_ON_JETSON)
+  set(fd_known_gpu_archs "53 62 72")
+  set(fd_known_gpu_archs10 "53 62 72")
+else()
+  message("Using New Release Strategy - All Arches Packge")
+  set(fd_known_gpu_archs "35 50 52 60 61 70 75 80 86")
+  set(fd_known_gpu_archs10 "35 50 52 60 61 70 75")
+  set(fd_known_gpu_archs11 "50 60 61 70 75 80")
+endif()
+
+######################################################################################
+# A function for automatic detection of GPUs installed  (if autodetection is enabled)
+# Usage:
+#   detect_installed_gpus(out_variable)
+function(detect_installed_gpus out_variable)
+  if(NOT CUDA_gpu_detect_output)
+    set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
+
+    file(
+      WRITE ${cufile}
+      ""
+      "#include \"stdio.h\"\n"
+      "#include \"cuda.h\"\n"
+      "#include \"cuda_runtime.h\"\n"
+      "int main() {\n"
+      "  int count = 0;\n"
+      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
+      "  if (count == 0) return -1;\n"
+      "  for (int device = 0; device < count; ++device) {\n"
+      "    cudaDeviceProp prop;\n"
+      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
+      "      printf(\"%d.%d \", prop.major, prop.minor);\n"
+      "  }\n"
+      "  return 0;\n"
+      "}\n")
+
+    execute_process(
+      COMMAND "${CMAKE_CUDA_COMPILER}" "--run" "${cufile}"
+      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
+      RESULT_VARIABLE nvcc_res
+      OUTPUT_VARIABLE nvcc_out
+      ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    if(nvcc_res EQUAL 0)
+      # only keep the last line of nvcc_out
+      string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
+      string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
+      list(GET nvcc_out -1 nvcc_out)
+      string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
+      set(CUDA_gpu_detect_output
+          ${nvcc_out}
+          CACHE INTERNAL
+                "Returned GPU architetures from detect_installed_gpus tool"
+                FORCE)
+    endif()
+  endif()
+
+  if(NOT CUDA_gpu_detect_output)
+    message(
+      STATUS
+        "Automatic GPU detection failed. Building for all known architectures.")
+    set(${out_variable}
+        ${fd_known_gpu_archs}
+        PARENT_SCOPE)
+  else()
+    set(${out_variable}
+        ${CUDA_gpu_detect_output}
+        PARENT_SCOPE)
+  endif()
+endfunction()
+
+########################################################################
+# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
+# Usage:
+#   select_nvcc_arch_flags(out_variable)
+function(select_nvcc_arch_flags out_variable)
+  # List of arch names
+  set(archs_names
+      "Kepler"
+      "Maxwell"
+      "Pascal"
+      "Volta"
+      "Turing"
+      "Ampere"
+      "All"
+      "Manual")
+  set(archs_name_default "All")
+  list(APPEND archs_names "Auto")
+
+  # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
+  set(CUDA_ARCH_NAME
+      ${archs_name_default}
+      CACHE STRING "Select target NVIDIA GPU achitecture.")
+  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
+  mark_as_advanced(CUDA_ARCH_NAME)
+
+  # verify CUDA_ARCH_NAME value
+  if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
+    string(REPLACE ";" ", " archs_names "${archs_names}")
+    message(
+      FATAL_ERROR "Only ${archs_names} architectures names are supported.")
+  endif()
+
+  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
+    set(CUDA_ARCH_BIN
+        ${fd_known_gpu_archs}
+        CACHE
+          STRING
+          "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
+    )
+    set(CUDA_ARCH_PTX
+        ""
+        CACHE
+          STRING
+          "Specify 'virtual' PTX architectures to build PTX intermediate code for"
+    )
+    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
+  else()
+    unset(CUDA_ARCH_BIN CACHE)
+    unset(CUDA_ARCH_PTX CACHE)
+  endif()
+
+  if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
+    set(cuda_arch_bin "30 35")
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
+    if(BUILD_ON_JETSON)
+      set(cuda_arch_bin "53")
+    else()
+      set(cuda_arch_bin "50")
+    endif()
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
+    if(BUILD_ON_JETSON)
+      set(cuda_arch_bin "62")
+    else()
+      set(cuda_arch_bin "60 61")
+    endif()
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
+    if(BUILD_ON_JETSON)
+      set(cuda_arch_bin "72")
+    else()
+      set(cuda_arch_bin "70")
+    endif()
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
+    set(cuda_arch_bin "75")
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
+    if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
+      set(cuda_arch_bin "80")
+    elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
+      set(cuda_arch_bin "80 86")
+    endif()
+  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
+    set(cuda_arch_bin ${fd_known_gpu_archs})
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
+    message(
+      STATUS
+        "WARNING: This is just a warning for publishing release.
+      You are building GPU version without supporting different architectures.
+      So the wheel package may fail on other GPU architectures.
+      You can add -DCUDA_ARCH_NAME=All in cmake command
+      to get a full wheel package to resolve this warning.
+      While, this version will still work on local GPU architecture.")
+    detect_installed_gpus(cuda_arch_bin)
+  else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
+    set(cuda_arch_bin ${CUDA_ARCH_BIN})
+  endif()
+
+  if(NEW_RELEASE_JIT)
+    set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
+    set(cuda_arch_bin "")
+  endif()
+
+  # remove dots and convert to lists
+  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
+  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
+  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
+  string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
+
+  list(REMOVE_DUPLICATES cuda_arch_bin)
+  list(REMOVE_DUPLICATES cuda_arch_ptx)
+
+  set(nvcc_flags "")
+  set(nvcc_archs_readable "")
+
+  # Tell NVCC to add binaries for the specified GPUs
+  foreach(arch ${cuda_arch_bin})
+    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
+      # User explicitly specified PTX for the concrete BIN
+      string(APPEND nvcc_flags
+             " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
+      string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
+    else()
+      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
+      string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
+      string(APPEND nvcc_archs_readable " sm_${arch}")
+    endif()
+  endforeach()
+
+  # Tell NVCC to add PTX intermediate code for the specified architectures
+  foreach(arch ${cuda_arch_ptx})
+    string(APPEND nvcc_flags
+           " -gencode arch=compute_${arch},code=compute_${arch}")
+    string(APPEND nvcc_archs_readable " compute_${arch}")
+  endforeach()
+
+  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
+  set(${out_variable}
+      ${nvcc_flags}
+      PARENT_SCOPE)
+  set(${out_variable}_readable
+      ${nvcc_archs_readable}
+      PARENT_SCOPE)
+endfunction()
+
+message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
+if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
+  set(fd_known_gpu_archs ${fd_known_gpu_archs10})
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
+elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
+  set(fd_known_gpu_archs ${fd_known_gpu_archs11})
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
+elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
+  set(fd_known_gpu_archs "${fd_known_gpu_archs11} 86")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
+endif()
+
+# setting nvcc arch flags
+select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
+message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
+
+# Set C++14 support
+set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
+# So, don't set these flags here.
+if(NOT DEFINED CMAKE_CUDA_STANDARD)
+  set(CMAKE_CUDA_STANDARD 11)
+else()
+  message(WARNING "Detected custom CMAKE_CUDA_STANDARD is using: ${CMAKE_CUDA_STANDARD}")  
+endif()
+
+# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
+# So replace /W[1-4] with /W0
+if(WIN32)
+  string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
+endif()
+# in cuda9, suppress cuda warning on eigen
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
+# Set :expt-relaxed-constexpr to suppress Eigen warnings
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
+# Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
+
+if(WIN32)
+  set(CMAKE_CUDA_FLAGS
+      "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
+  if(MSVC_STATIC_CRT)
+    foreach(flag_var
+            CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
+            CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "-MD")
+        string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
+      endif()
+    endforeach()
+  endif()
+endif()
+
+mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
+mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
diff --git a/libs/ultrainfer/cmake/cvcuda.cmake b/libs/ultrainfer/cmake/cvcuda.cmake
new file mode 100755
index 0000000000..5d65ba538f
--- /dev/null
+++ b/libs/ultrainfer/cmake/cvcuda.cmake
@@ -0,0 +1,41 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(NOT WITH_GPU)
+  message(FATAL_ERROR "ENABLE_CVCUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.")
+endif()
+
+if(APPLE OR IOS OR WIN32)
+  message(FATAL_ERROR "Cannot enable CV-CUDA in mac/ios/windows os, please set -DENABLE_CVCUDA=OFF.")
+endif()
+
+if(NOT (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64"))
+  message(FATAL_ERROR "CV-CUDA only support x86_64.")
+endif()
+
+set(CVCUDA_LIB_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.1-alpha/nvcv-lib-0.2.1_alpha-cuda11-x86_64-linux.tar.xz)
+set(CVCUDA_LIB_FILENAME nvcv-lib-0.2.1_alpha-cuda11-x86_64-linux.tar.xz)
+set(CVCUDA_DEV_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.1-alpha/nvcv-dev-0.2.1_alpha-cuda11-x86_64-linux.tar.xz)
+set(CVCUDA_DEV_FILENAME nvcv-dev-0.2.1_alpha-cuda11-x86_64-linux.tar.xz)
+
+download_and_decompress(${CVCUDA_LIB_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_LIB_FILENAME} ${THIRD_PARTY_PATH}/cvcuda)
+download_and_decompress(${CVCUDA_DEV_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_DEV_FILENAME} ${THIRD_PARTY_PATH}/cvcuda)
+
+execute_process(COMMAND rm -rf ${THIRD_PARTY_PATH}/install/cvcuda)
+execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install/cvcuda)
+execute_process(COMMAND cp -r ${THIRD_PARTY_PATH}/cvcuda/opt/nvidia/cvcuda0/lib/x86_64-linux-gnu/ ${THIRD_PARTY_PATH}/install/cvcuda/lib)
+execute_process(COMMAND cp -r ${THIRD_PARTY_PATH}/cvcuda/opt/nvidia/cvcuda0/include/ ${THIRD_PARTY_PATH}/install/cvcuda/include)
+
+link_directories(${THIRD_PARTY_PATH}/install/cvcuda/lib)
+include_directories(${THIRD_PARTY_PATH}/install/cvcuda/include)
diff --git a/libs/ultrainfer/cmake/faiss.cmake b/libs/ultrainfer/cmake/faiss.cmake
new file mode 100755
index 0000000000..654a908e8e
--- /dev/null
+++ b/libs/ultrainfer/cmake/faiss.cmake
@@ -0,0 +1,122 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+set(FAISS_PROJECT external_faiss_download)
+set(FAISS_FILENAME faiss)  
+set(FAISS_PREFIX_DIR ${THIRD_LIBS_PATH}/${FAISS_FILENAME})
+set(FAISS_SOURCE_DIR ${THIRD_LIBS_PATH}/${FAISS_FILENAME}/src/${FAISS_PROJECT})
+set(FAISS_INSTALL_DIR ${THIRD_LIBS_PATH}/install/${FAISS_FILENAME})
+set(FAISS_INC_DIR ${FAISS_INSTALL_DIR}/include CACHE PATH "faiss include directory." FORCE)
+set(FAISS_LIB_DIR ${FAISS_INSTALL_DIR}/lib CACHE PATH "faiss lib directory." FORCE)
+
+if(NOT WITH_FAISS_STATIC)
+  message(FATAL_ERROR "Not support WITH_FAISS_STATIC=OFF now!")
+endif()
+
+set(FAISS_URL_PREFIX "https://bj.bcebos.com/fastdeploy/test")
+
+set(FAISS_VERSION 1.7.3)
+# URL
+if(NOT FAISS_URL)
+  if(WIN32)
+    set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-win-x64-${FAISS_VERSION}.zip")
+  elseif(APPLE)
+    if(CURRENT_OSX_ARCH MATCHES "arm64")
+      set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-osx-arm64-${FAISS_VERSION}.tgz")
+    else()
+      set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-osx-x64-${FAISS_VERSION}.tgz")
+    endif()  
+  else() # Linux
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      message(FATAL_ERROR "Not support for Linux aarch64 now!")
+    else()
+      if(WITH_FAISS_GPU)
+        set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-linux-x64-gpu-${FAISS_VERSION}.tgz")
+      else()
+        set(FAISS_URL "${FAISS_URL_PREFIX}/faiss-linux-x64-${FAISS_VERSION}.tgz")
+      endif()
+    endif()
+  endif()
+endif()
+
+# FAISS Headers
+include_directories(${FAISS_INC_DIR})
+
+# FAISS Libs paths
+if(WIN32)
+  set(FAISS_LIB "${FAISS_LIB_DIR}/faiss.lib")
+elseif(APPLE)
+  set(FAISS_LIB "${FAISS_LIB_DIR}/libfaiss.a")
+else() # Linux
+  set(FAISS_LIB "${FAISS_LIB_DIR}/libfaiss.a")
+endif()
+
+# Download FAISS
+ExternalProject_Add(
+  ${FAISS_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${FAISS_URL}
+  PREFIX ${FAISS_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E remove_directory ${FAISS_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E make_directory ${FAISS_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E rename ${FAISS_SOURCE_DIR}/lib/ ${FAISS_INSTALL_DIR}/lib &&
+    ${CMAKE_COMMAND} -E copy_directory ${FAISS_SOURCE_DIR}/include ${FAISS_INC_DIR}
+  BUILD_BYPRODUCTS ${FAISS_LIB})
+
+set(FAISS_LIBRARIES)
+
+add_library(external_faiss STATIC IMPORTED GLOBAL)
+set_property(TARGET external_faiss PROPERTY IMPORTED_LOCATION ${FAISS_LIB})
+add_dependencies(external_faiss ${FAISS_PROJECT})
+
+list(APPEND FAISS_LIBRARIES external_faiss)
+
+# Add BLAS/LAPACK/OpenBLAS (needed by FAISS)
+if(WIN32)
+  add_library(external_blas STATIC IMPORTED GLOBAL)
+  set_property(TARGET external_blas PROPERTY IMPORTED_LOCATION ${FAISS_LIB_DIR}/BLAS.lib)
+  add_dependencies(external_blas ${FAISS_PROJECT})
+  list(APPEND FAISS_LIBRARIES external_blas)
+
+  add_library(external_lapack STATIC IMPORTED GLOBAL)
+  set_property(TARGET external_lapack PROPERTY IMPORTED_LOCATION ${FAISS_LIB_DIR}/LAPACK.lib)
+  add_dependencies(external_lapack ${FAISS_PROJECT})
+  list(APPEND FAISS_LIBRARIES external_lapack)
+elseif(APPLE)
+  find_package(BLAS REQUIRED)
+  list(APPEND FAISS_LIBRARIES ${BLAS_LIBRARIES})
+
+  find_package(LAPACK REQUIRED)
+  list(APPEND FAISS_LIBRARIES ${LAPACK_LIBRARIES})
+else() # Linux
+  find_package(BLAS REQUIRED)
+  list(APPEND FAISS_LIBRARIES ${BLAS_LIBRARIES})
+
+  find_package(LAPACK REQUIRED)
+  list(APPEND FAISS_LIBRARIES ${LAPACK_LIBRARIES})
+endif()
+
+# Add OpenMP (REQUIRED), OpenMP must be avaliable.
+find_package(OpenMP REQUIRED)
+list(APPEND FAISS_LIBRARIES OpenMP::OpenMP_CXX)
+
+set(FAISS_INCLUDE_DIRS ${FAISS_INC_DIR})
+set(FAISS_LIBS ${FAISS_LIBRARIES})
+set(FAISS_FOUND TRUE)
diff --git a/libs/ultrainfer/cmake/fast_tokenizer.cmake b/libs/ultrainfer/cmake/fast_tokenizer.cmake
new file mode 100755
index 0000000000..4803a1db02
--- /dev/null
+++ b/libs/ultrainfer/cmake/fast_tokenizer.cmake
@@ -0,0 +1,106 @@
+
+
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+set(FASTTOKENIZER_PROJECT "extern_fast_tokenizer")
+set(FASTTOKENIZER_PREFIX_DIR ${THIRD_PARTY_PATH}/fast_tokenizer)
+set(FASTTOKENIZER_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/fast_tokenizer/src/${FASTTOKENIZER_PROJECT})
+set(FASTTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/fast_tokenizer)
+set(FASTTOKENIZER_INC_DIR
+    "${FASTTOKENIZER_INSTALL_DIR}/include"
+    "${FASTTOKENIZER_INSTALL_DIR}/third_party/include"
+    CACHE PATH "fast_tokenizer include directory." FORCE)
+set(FASTTOKENIZER_LIB_DIR
+    "${FASTTOKENIZER_INSTALL_DIR}/lib/"
+    CACHE PATH "fast_tokenizer lib directory." FORCE)
+
+set(FASTTOKENIZER_THIRD_LIB_DIR
+    "${FASTTOKENIZER_INSTALL_DIR}/third_party/lib/"
+    CACHE PATH "fast_tokenizer lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
+                      "${FASTTOKENIZER_LIB_DIR}")
+
+include_directories(${FASTTOKENIZER_INC_DIR})
+
+# Set lib path
+if(WIN32)
+  set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib"
+      CACHE FILEPATH "fast_tokenizer compile library." FORCE)
+  set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
+  set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")
+elseif(APPLE)
+  set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
+      CACHE FILEPATH "fast_tokenizer compile library." FORCE)
+else()
+  set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
+      CACHE FILEPATH "fast_tokenizer compile library." FORCE)
+endif(WIN32)
+message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
+
+set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
+set(FASTTOKENIZER_VERSION "1.0.2")
+
+# Set download url
+if(WIN32)
+  set(FASTTOKENIZER_FILE "fast_tokenizer-win-x64-${FASTTOKENIZER_VERSION}.zip")
+  if(NOT CMAKE_CL_64)
+    set(FASTTOKENIZER_FILE "fast_tokenizer-win-x86-${FASTTOKENIZER_VERSION}.zip")
+  endif()
+elseif(APPLE)
+  if(CURRENT_OSX_ARCH MATCHES "arm64")
+    set(FASTTOKENIZER_FILE "fast_tokenizer-osx-arm64-${FASTTOKENIZER_VERSION}.tgz")
+  else()
+    set(FASTTOKENIZER_FILE "fast_tokenizer-osx-x86_64-${FASTTOKENIZER_VERSION}.tgz")
+  endif()
+else()
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+    set(FASTTOKENIZER_FILE "fast_tokenizer-linux-aarch64-${FASTTOKENIZER_VERSION}.tgz")
+  else()
+    set(FASTTOKENIZER_FILE "fast_tokenizer-linux-x64-${FASTTOKENIZER_VERSION}.tgz")
+  endif()
+endif()
+set(FASTTOKENIZER_URL "${FASTTOKENIZER_URL_BASE}${FASTTOKENIZER_FILE}")
+
+ExternalProject_Add(
+  ${FASTTOKENIZER_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${FASTTOKENIZER_URL}
+  PREFIX ${FASTTOKENIZER_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR}
+  BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
+
+add_library(fast_tokenizer STATIC IMPORTED GLOBAL)
+set_property(TARGET fast_tokenizer PROPERTY IMPORTED_LOCATION ${FASTTOKENIZER_COMPILE_LIB})
+add_dependencies(fast_tokenizer ${FASTTOKENIZER_PROJECT})
+list(APPEND DEPEND_LIBS fast_tokenizer)
+
+if (WIN32)
+  add_library(icudt STATIC IMPORTED GLOBAL)
+  set_property(TARGET icudt PROPERTY IMPORTED_LOCATION ${ICUDT_LIB})
+  add_dependencies(icudt ${FASTTOKENIZER_PROJECT})
+  list(APPEND DEPEND_LIBS icudt)
+
+  add_library(icuuc STATIC IMPORTED GLOBAL)
+  set_property(TARGET icuuc PROPERTY IMPORTED_LOCATION ${ICUUC_LIB})
+  add_dependencies(icuuc ${FASTTOKENIZER_PROJECT})
+  list(APPEND DEPEND_LIBS icuuc)
+endif()
diff --git a/libs/ultrainfer/cmake/flycv.cmake b/libs/ultrainfer/cmake/flycv.cmake
new file mode 100755
index 0000000000..9bce185368
--- /dev/null
+++ b/libs/ultrainfer/cmake/flycv.cmake
@@ -0,0 +1,97 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+set(FLYCV_PROJECT "extern_flycv")
+set(FLYCV_PREFIX_DIR ${THIRD_PARTY_PATH}/flycv)
+set(FLYCV_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/flycv/src/${FLYCV_PROJECT})
+set(FLYCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/flycv)
+set(FLYCV_INC_DIR
+    "${FLYCV_INSTALL_DIR}/include"
+    CACHE PATH "flycv include directory." FORCE)
+set(FLYCV_LIB_DIR
+      "${FLYCV_INSTALL_DIR}/lib/"
+      CACHE PATH "flycv lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
+                      "${FLYCV_LIB_DIR}")
+
+include_directories(${FLYCV_INC_DIR})
+
+if(WIN32)
+  set(FLYCV_COMPILE_LIB
+      "${FLYCV_INSTALL_DIR}/lib/flycv.lib"
+      CACHE FILEPATH "flycv compile library." FORCE)
+elseif(APPLE)
+  set(FLYCV_COMPILE_LIB
+      "${FLYCV_INSTALL_DIR}/lib/libflycv.dylib"
+      CACHE FILEPATH "flycv compile library." FORCE)      
+else()
+  set(FLYCV_COMPILE_LIB
+      "${FLYCV_INSTALL_DIR}/lib/libflycv_shared.so"
+      CACHE FILEPATH "flycv compile library." FORCE)
+endif()
+
+set(FLYCV_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+set(FLYCV_VERSION "1.0.0")
+
+if(WIN32)
+  message(FATAL_ERROR "FlyCV is not supported on Windows now.")
+  set(FLYCV_FILE "flycv-win-x64-${FLYCV_VERSION}.zip")
+elseif(APPLE)
+  message(FATAL_ERROR "FlyCV is not supported on Mac OSX now.")
+  if(CURRENT_OSX_ARCH MATCHES "arm64")
+    set(FLYCV_FILE "flycv-osx-arm64-${FLYCV_VERSION}.tgz")
+  else()
+    set(FLYCV_FILE "flycv-osx-x86_64-${FLYCV_VERSION}.tgz")
+  endif()
+else()
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+    set(FLYCV_FILE "flycv-linux-aarch64-${FLYCV_VERSION}.tgz")
+  else()
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+      set(FLYCV_FILE "flycv-linux-aarch64-${FLYCV_VERSION}.tgz")
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+      set(FLYCV_FILE "flycv-linux-armhf-${FLYCV_VERSION}.tgz")
+    else()
+      # set(FLYCV_FILE "flycv-linux-x64-${FLYCV_VERSION}.tgz")
+      set(FLYCV_FILE "flycv-linux-x64-1.1.0-dev.tgz")
+    endif()
+  endif()
+endif()
+set(FLYCV_URL "${FLYCV_URL_BASE}${FLYCV_FILE}")
+
+ExternalProject_Add(
+  ${FLYCV_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${FLYCV_URL}
+  PREFIX ${FLYCV_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E remove_directory ${FLYCV_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E make_directory ${FLYCV_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E rename ${FLYCV_SOURCE_DIR}/lib/
+    ${FLYCV_LIB_DIR} && ${CMAKE_COMMAND} -E copy_directory
+    ${FLYCV_SOURCE_DIR}/include ${FLYCV_INC_DIR}
+  BUILD_BYPRODUCTS ${FLYCV_COMPILE_LIB})
+
+add_library(external_flycv STATIC IMPORTED GLOBAL)
+set_property(TARGET external_flycv PROPERTY IMPORTED_LOCATION
+                                         ${FLYCV_COMPILE_LIB})
+add_dependencies(external_flycv ${FLYCV_PROJECT})
+
+set(FLYCV_LIBRARIES external_flycv)
diff --git a/libs/ultrainfer/cmake/gflags.cmake b/libs/ultrainfer/cmake/gflags.cmake
new file mode 100755
index 0000000000..dc2cac4dce
--- /dev/null
+++ b/libs/ultrainfer/cmake/gflags.cmake
@@ -0,0 +1,89 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+INCLUDE(ExternalProject)
+
+if(THIRD_PARTY_PATH)
+  SET(GFLAGS_PREFIX_DIR  ${THIRD_PARTY_PATH}/gflags)
+  SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags)
+else()
+  # For example cmake
+  SET(GFLAGS_PREFIX_DIR  ${ULTRAINFER_INSTALL_DIR}/installed_ultrainfer/cmake)
+  SET(GFLAGS_INSTALL_DIR ${ULTRAINFER_INSTALL_DIR}/installed_ultrainfer/cmake/gflags)
+endif()
+SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
+set(GFLAGS_SOURCE_FILE ${GFLAGS_PREFIX_DIR}/src/gflags.tgz CACHE PATH "gflags source file." FORCE)
+
+set(GFLAGS_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs")
+set(GFLAGS_URL ${GFLAGS_URL_PREFIX}/gflags.tgz)
+set(GFLAGS_CACHE_FILE ${CMAKE_CURRENT_LIST_DIR}/gflags.tgz)
+if(EXISTS ${GFLAGS_CACHE_FILE})
+  set(GFLAGS_URL ${GFLAGS_CACHE_FILE} CACHE PATH "gflags cache file." FORCE)
+  set(GFLAGS_SOURCE_FILE ${GFLAGS_CACHE_FILE} CACHE PATH "gflags source file." FORCE)
+endif()
+
+IF(WIN32)
+  set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
+ELSE(WIN32)
+  set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
+  set(BUILD_COMMAND $(MAKE) --silent)
+  set(INSTALL_COMMAND $(MAKE) install)
+ENDIF(WIN32)
+
+INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
+
+ExternalProject_Add(
+    extern_gflags
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    URL ${GFLAGS_URL}
+    PREFIX          ${GFLAGS_PREFIX_DIR}
+    UPDATE_COMMAND  ""
+    BUILD_COMMAND   ${BUILD_COMMAND}
+    INSTALL_COMMAND ${INSTALL_COMMAND}
+    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                    -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+                    -DBUILD_STATIC_LIBS=ON
+                    -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
+                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+                    -DBUILD_TESTING=OFF
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+                    ${EXTERNAL_OPTIONAL_ARGS}
+    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
+                    -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+                    -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+    BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
+)
+ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
+ADD_DEPENDENCIES(gflags extern_gflags)
+
+if(UNIX AND (NOT APPLE))
+  list(APPEND GFLAGS_LIBRARIES pthread)
+endif()
+
+# On Windows (including MinGW), the Shlwapi library is used by gflags if available.
+if (WIN32)
+  include(CheckIncludeFileCXX)
+  check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
+  if (HAVE_SHLWAPI)
+    set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
+    list(APPEND GFLAGS_LIBRARIES shlwapi.lib)
+  endif(HAVE_SHLWAPI)
+endif (WIN32)
diff --git a/libs/ultrainfer/cmake/glog.cmake b/libs/ultrainfer/cmake/glog.cmake
new file mode 100755
index 0000000000..2a55b7a37b
--- /dev/null
+++ b/libs/ultrainfer/cmake/glog.cmake
@@ -0,0 +1,68 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+INCLUDE(ExternalProject)
+
+SET(GLOG_PREFIX_DIR  ${THIRD_PARTY_PATH}/glog)
+SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog)
+SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE)
+SET(GLOG_REPOSITORY ${GIT_URL}/google/glog.git)
+SET(GLOG_TAG        v0.4.0)
+
+IF(WIN32)
+  SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/glog.lib" CACHE FILEPATH "glog library." FORCE)
+  SET(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4530")
+  add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+ELSE(WIN32)
+  SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE)
+  SET(GLOG_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ENDIF(WIN32)
+
+INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
+
+ExternalProject_Add(
+    extern_glog
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    ${SHALLOW_CLONE}
+    GIT_REPOSITORY  ${GLOG_REPOSITORY}
+    GIT_TAG         ${GLOG_TAG}
+    DEPENDS         gflags
+    PREFIX          ${GLOG_PREFIX_DIR}
+    UPDATE_COMMAND  ""
+    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                    -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+                    -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
+                    -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
+                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+                    -DWITH_GFLAGS=OFF
+                    -DBUILD_TESTING=OFF
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+                    ${EXTERNAL_OPTIONAL_ARGS}
+    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
+                     -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
+                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+                     -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+    BUILD_BYPRODUCTS ${GLOG_LIBRARIES}
+)
+
+ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
+ADD_DEPENDENCIES(glog extern_glog gflags)
+LINK_LIBRARIES(glog)
diff --git a/libs/ultrainfer/cmake/gtest.cmake b/libs/ultrainfer/cmake/gtest.cmake
new file mode 100755
index 0000000000..4294850716
--- /dev/null
+++ b/libs/ultrainfer/cmake/gtest.cmake
@@ -0,0 +1,84 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+IF(WITH_TESTING)
+
+INCLUDE(GNUInstallDirs)
+INCLUDE(ExternalProject)
+
+SET(GTEST_PREFIX_DIR    ${THIRD_PARTY_PATH}/gtest)
+SET(GTEST_INSTALL_DIR   ${THIRD_PARTY_PATH}/install/gtest)
+SET(GTEST_INCLUDE_DIR   "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE)
+set(GTEST_REPOSITORY    ${GIT_URL}/google/googletest.git)
+set(GTEST_TAG           release-1.8.1)
+
+INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR})
+
+IF(WIN32)
+    set(GTEST_LIBRARIES
+        "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE)
+    set(GTEST_MAIN_LIBRARIES
+        "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE)
+    string(REPLACE "/w " "" GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    string(REPLACE "/w " "" GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    string(REPLACE "/W0 " "" GTEST_CMAKE_C_FLAGS "${GTEST_CMAKE_C_FLAGS}")
+    string(REPLACE "/W0 " "" GTEST_CMAKE_CXX_FLAGS "${GTEST_CMAKE_CXX_FLAGS}")
+ELSE(WIN32)
+    set(GTEST_LIBRARIES
+        "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE)
+    set(GTEST_MAIN_LIBRARIES
+        "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE)
+    set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ENDIF(WIN32)
+
+ExternalProject_Add(
+    extern_gtest
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    ${SHALLOW_CLONE}
+    GIT_REPOSITORY  ${GTEST_REPOSITORY}
+    GIT_TAG         ${GTEST_TAG}
+    PREFIX          ${GTEST_PREFIX_DIR}
+    UPDATE_COMMAND  ""
+    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                    -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS=${GTEST_CMAKE_C_FLAGS}
+                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+                    -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
+                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+                    -DBUILD_GMOCK=ON
+                    -Dgtest_disable_pthreads=ON
+                    -Dgtest_force_shared_crt=ON
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+                    ${EXTERNAL_OPTIONAL_ARGS}
+    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
+                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+                     -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+    BUILD_BYPRODUCTS ${GTEST_LIBRARIES}
+    BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES}
+)
+
+ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES})
+ADD_DEPENDENCIES(gtest extern_gtest)
+
+ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
+ADD_DEPENDENCIES(gtest_main extern_gtest)
+
+ENDIF()
diff --git a/libs/ultrainfer/cmake/horizon.cmake b/libs/ultrainfer/cmake/horizon.cmake
new file mode 100755
index 0000000000..51070a75c6
--- /dev/null
+++ b/libs/ultrainfer/cmake/horizon.cmake
@@ -0,0 +1,24 @@
+# get Horizon_URL
+set(HORIZON_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+
+set(HORIZON_VERSION "2.5.2")
+set(HORIZON_FILE "horizon_runtime-xj3-aarch64-${HORIZON_VERSION}.tgz")
+set(HORIZON_URL "${HORIZON_URL_BASE}${HORIZON_FILE}")
+
+# download_and_decompress
+download_and_decompress(${HORIZON_URL} ${CMAKE_CURRENT_BINARY_DIR}/${HORIZON_FILE} ${THIRD_PARTY_PATH}/install)
+# set path
+set(HORIZON_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/)
+
+set(DNN_PATH ${HORIZON_RUNTIME_PATH}/dnn/)
+set(APPSDK_PATH ${HORIZON_RUNTIME_PATH}/appsdk/appuser/)
+
+set(DNN_LIB_PATH ${DNN_PATH}/lib)
+set(APPSDK_LIB_PATH ${APPSDK_PATH}/lib/hbbpu)
+set(BPU_libs dnn cnn_intf hbrt_bernoulli_aarch64)
+
+include_directories(${DNN_PATH}/include
+                    ${APPSDK_PATH}/include)
+link_directories(${DNN_LIB_PATH}
+                ${APPSDK_PATH}/lib/hbbpu
+                ${APPSDK_PATH}/lib)
diff --git a/libs/ultrainfer/cmake/kunlunxin.cmake b/libs/ultrainfer/cmake/kunlunxin.cmake
new file mode 100755
index 0000000000..3194c76212
--- /dev/null
+++ b/libs/ultrainfer/cmake/kunlunxin.cmake
@@ -0,0 +1,26 @@
+if(NOT ENABLE_PADDLE_BACKEND)
+  if(NOT ENABLE_LITE_BACKEND)
+    message(WARNING "Will force to set ENABLE_LITE_BACKEND=ON if ENABLE_PADDLE_BACKEND=OFF when build with KunlunXin.")
+    set(ENABLE_LITE_BACKEND ON)      
+  endif()
+else()
+  if(ENABLE_LITE_BACKEND)
+    message(WARNING "Will force to set ENABLE_LITE_BACKEND=OFF if ENABLE_PADDLE_BACKEND=ON when build with KunlunXin.")
+    set(ENABLE_LITE_BACKEND OFF)      
+  endif()
+endif()
+
+option(WITH_LITE_XPU_LOG "" ON)
+if(NOT ENABLE_PADDLE_BACKEND)
+  if(NOT PADDLELITE_URL)
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-aarch64-xpu-v213.tgz")
+    else ()
+      if (WITH_LITE_XPU_LOG)
+        set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230410.tgz")
+      else()
+        set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-without-log-20230303.tgz")
+      endif()
+    endif()
+  endif()
+endif()
diff --git a/libs/ultrainfer/cmake/onnxruntime.cmake b/libs/ultrainfer/cmake/onnxruntime.cmake
new file mode 100755
index 0000000000..8672c8c7ef
--- /dev/null
+++ b/libs/ultrainfer/cmake/onnxruntime.cmake
@@ -0,0 +1,129 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+set(ONNXRUNTIME_PROJECT "extern_onnxruntime")
+set(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime)
+set(ONNXRUNTIME_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT})
+set(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime)
+
+if (ORT_DIRECTORY)
+  message(STATUS "Use the onnxruntime lib specified by user. The ONNXRuntime path: ${ORT_DIRECTORY}")
+  STRING(REGEX REPLACE "\\\\" "/" ORT_DIRECTORY ${ORT_DIRECTORY})
+  set(ONNXRUNTIME_INC_DIR
+    "${ORT_DIRECTORY}/include"
+    CACHE PATH "onnxruntime include directory." FORCE)
+
+  set(ONNXRUNTIME_LIB_DIR
+    "${ORT_DIRECTORY}/lib"
+    CACHE PATH "onnxruntime lib directory." FORCE)
+else()
+  message(STATUS "Use the default onnxruntime lib. The ONNXRuntime path: ${ONNXRUNTIME_INSTALL_DIR}")
+  set(ONNXRUNTIME_INC_DIR
+      "${ONNXRUNTIME_INSTALL_DIR}/include"
+      CACHE PATH "onnxruntime include directory." FORCE)
+  set(ONNXRUNTIME_LIB_DIR
+      "${ONNXRUNTIME_INSTALL_DIR}/lib"
+      CACHE PATH "onnxruntime lib directory." FORCE)
+endif()
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}")
+
+set(ONNXRUNTIME_VERSION "1.12.0")
+set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/")
+
+if(WIN32) 
+  if(WITH_GPU)
+    set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}.zip")
+  elseif(WITH_DIRECTML)
+    set(ONNXRUNTIME_FILENAME "onnxruntime-directml-win-x64.zip")
+  else()
+    set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-${ONNXRUNTIME_VERSION}.zip")
+  endif()
+  if(NOT CMAKE_CL_64)
+    if(WITH_DIRECTML)
+      set(ONNXRUNTIME_FILENAME "onnxruntime-directml-win-x86.zip")
+    else()
+      set(ONNXRUNTIME_FILENAME "onnxruntime-win-x86-${ONNXRUNTIME_VERSION}.zip")
+    endif()
+  endif()
+elseif(APPLE)
+  if(CURRENT_OSX_ARCH MATCHES "arm64")
+    set(ONNXRUNTIME_FILENAME "onnxruntime-osx-arm64-${ONNXRUNTIME_VERSION}.tgz")
+  else()
+    set(ONNXRUNTIME_FILENAME "onnxruntime-osx-x86_64-${ONNXRUNTIME_VERSION}.tgz")
+  endif()
+else()
+  if(WITH_GPU)
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      message("Cannot compile with onnxruntime-gpu while in linux-aarch64 platform, fallback to onnxruntime-cpu")
+      set(ONNXRUNTIME_FILENAME "onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}.tgz")
+    else()
+      set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}.tgz")
+    endif()
+  else()
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      set(ONNXRUNTIME_FILENAME "onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}.tgz")
+    else()
+      # cross-compling while the host is x64 but the target is aarch64.
+      if ((CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "arm"))
+        set(ONNXRUNTIME_FILENAME "onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}.tgz")
+      else()
+        set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz")
+      endif()
+    endif()
+  endif()
+endif()
+set(ONNXRUNTIME_URL "${ONNXRUNTIME_URL_PREFIX}${ONNXRUNTIME_FILENAME}")
+
+include_directories(${ONNXRUNTIME_INC_DIR}
+)# For ONNXRUNTIME code to include internal headers.
+
+if(WIN32)
+  set(ONNXRUNTIME_LIB
+      "${ONNXRUNTIME_LIB_DIR}/onnxruntime.lib"
+      CACHE FILEPATH "ONNXRUNTIME shared library." FORCE)
+elseif(APPLE)
+  set(ONNXRUNTIME_LIB
+      "${ONNXRUNTIME_LIB_DIR}/libonnxruntime.dylib"
+      CACHE FILEPATH "ONNXRUNTIME shared library." FORCE)
+else()
+  set(ONNXRUNTIME_LIB
+      "${ONNXRUNTIME_LIB_DIR}/libonnxruntime.so"
+      CACHE FILEPATH "ONNXRUNTIME shared library." FORCE)
+endif()
+
+if (NOT ORT_DIRECTORY)
+  ExternalProject_Add(
+    ${ONNXRUNTIME_PROJECT}
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    URL ${ONNXRUNTIME_URL}
+    PREFIX ${ONNXRUNTIME_PREFIX_DIR}
+    DOWNLOAD_NO_PROGRESS 1
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    UPDATE_COMMAND ""
+    INSTALL_COMMAND
+      ${CMAKE_COMMAND} -E remove_directory ${ONNXRUNTIME_INSTALL_DIR} &&
+      ${CMAKE_COMMAND} -E make_directory ${ONNXRUNTIME_INSTALL_DIR} &&
+      ${CMAKE_COMMAND} -E rename ${ONNXRUNTIME_SOURCE_DIR}/lib/ ${ONNXRUNTIME_INSTALL_DIR}/lib &&
+      ${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include
+      ${ONNXRUNTIME_INC_DIR}
+    BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB})
+endif()
+
+add_library(external_onnxruntime STATIC IMPORTED GLOBAL)
+set_property(TARGET external_onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB})
+add_dependencies(external_onnxruntime ${ONNXRUNTIME_PROJECT})
diff --git a/libs/ultrainfer/cmake/opencv.cmake b/libs/ultrainfer/cmake/opencv.cmake
new file mode 100755
index 0000000000..948931a69c
--- /dev/null
+++ b/libs/ultrainfer/cmake/opencv.cmake
@@ -0,0 +1,90 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set(COMPRESSED_SUFFIX ".tgz")
+
+if(WIN32)
+  if(NOT CMAKE_CL_64)
+    set(OPENCV_FILENAME "opencv-win-x86-3.4.16")
+  else()
+    set(OPENCV_FILENAME "opencv-win-x64-3.4.16")
+  endif()
+  set(COMPRESSED_SUFFIX ".zip")
+elseif(APPLE)
+  if(CURRENT_OSX_ARCH MATCHES "arm64")
+    set(OPENCV_FILENAME "opencv-osx-arm64-3.4.16")
+  else()
+    set(OPENCV_FILENAME "opencv-osx-x86_64-3.4.16")
+  endif()
+elseif(IOS)
+  message(FATAL_ERROR "Not support cross compiling for IOS now!")
+# Linux
+else()
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+    set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14")
+  endif()
+endif()
+
+if(NOT OPENCV_FILENAME)
+  set(OPENCV_FILENAME "opencv-linux-x64-3.4.16")
+endif()
+
+set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/)
+if(WIN32)
+  if(NOT CMAKE_CL_64)
+    set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs")
+  else()
+    set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs")
+  endif()
+else() # TODO: use ultrainfer/third_libs instead.
+  set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs")
+endif()
+if(NOT OPENCV_URL)
+  set(OPENCV_URL ${OPENCV_URL_PREFIX}/${OPENCV_FILENAME}${COMPRESSED_SUFFIX})
+endif()
+
+
+if(BUILD_ON_JETSON)
+  if(EXISTS /usr/lib/aarch64-linux-gnu/cmake/opencv4/)
+    set(OPENCV_DIRECTORY /usr/lib/aarch64-linux-gnu/cmake/opencv4/)
+  endif()
+endif()
+
+if(OPENCV_DIRECTORY)
+  message(STATUS "Use the opencv lib specified by user. The OpenCV path: ${OPENCV_DIRECTORY}")
+  STRING(REGEX REPLACE "\\\\" "/" OPENCV_DIRECTORY ${OPENCV_DIRECTORY})
+  # Win/Linux/Mac
+  set(OpenCV_DIR ${OPENCV_DIRECTORY})
+  find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR})
+  include_directories(${OpenCV_INCLUDE_DIRS})
+  list(APPEND DEPEND_LIBS ${OpenCV_LIBS})
+else()
+  message(STATUS "Use the default OpenCV lib from: ${OPENCV_URL}")
+  # Win/Linux/Mac
+  download_and_decompress(${OPENCV_URL} ${CMAKE_CURRENT_BINARY_DIR}/${OPENCV_FILENAME}${COMPRESSED_SUFFIX} ${THIRD_PARTY_PATH}/install/)
+  if(EXISTS ${THIRD_PARTY_PATH}/install/opencv)
+    file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/opencv) 
+  endif()
+  file(RENAME ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME}/ ${THIRD_PARTY_PATH}/install/opencv)
+  set(OPENCV_FILENAME opencv)
+  if(NOT OpenCV_DIR)
+    set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME})
+  endif()
+  if (WIN32)
+    set(OpenCV_DIR ${OpenCV_DIR}/build)
+  endif()
+  find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR} NO_DEFAULT_PATH)
+  include_directories(${OpenCV_INCLUDE_DIRS})
+  list(APPEND DEPEND_LIBS opencv_core opencv_video opencv_highgui opencv_imgproc opencv_imgcodecs opencv_calib3d opencv_features2d opencv_flann)
+endif()
diff --git a/libs/ultrainfer/cmake/openvino.cmake b/libs/ultrainfer/cmake/openvino.cmake
new file mode 100755
index 0000000000..c97af80bf1
--- /dev/null
+++ b/libs/ultrainfer/cmake/openvino.cmake
@@ -0,0 +1,112 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+if (OPENVINO_DIRECTORY)
+  message(STATUS "Use the openvino lib specified by user. The OpenVINO path: ${OPENVINO_DIRECTORY}")
+  STRING(REGEX REPLACE "\\\\" "/" OPENVINO_DIRECTORY ${OPENVINO_DIRECTORY})
+  get_openvino_libs(${OPENVINO_DIRECTORY}/runtime)
+  include_directories(${OPENVINO_DIRECTORY}/runtime/include ${OPENVINO_DIRECTORY}/runtime/include/ie)
+  set(OPENVINO_LIB_DIR
+    "${OPENVINO_DIRECTORY}/runtime/lib/intel64"
+    CACHE PATH "openvino lib directory." FORCE)
+  set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENVINO_LIB_DIR}")
+
+else()
+  set(OPENVINO_PROJECT "extern_openvino")
+
+  set(OPENVINO_VERSION "2022.2.0.dev20220829")
+  set(OPENVINO_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs/")
+
+  set(COMPRESSED_SUFFIX ".tgz")
+  if(WIN32)
+    set(OPENVINO_FILENAME "w_openvino_toolkit_windows_${OPENVINO_VERSION}")
+    set(COMPRESSED_SUFFIX ".zip")
+    if(NOT CMAKE_CL_64)
+      message(FATAL_ERROR "UltraInfer cannot ENABLE_OPENVINO_BACKEND in win32 now.")
+    endif()
+  elseif(APPLE)
+    if(CURRENT_OSX_ARCH MATCHES "arm64")
+      message("Cannot compile with openvino while in osx arm64 platform right now")
+    else()
+      set(OPENVINO_FILENAME "m_openvino_toolkit_osx_${OPENVINO_VERSION}")
+    endif()
+  else()
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      message("Cannot compile with openvino while in linux-aarch64 platform")
+    else()
+      set(OPENVINO_VERSION "dev.2023.03.2")
+      if(NEED_ABI0)
+        set(OPENVINO_FILENAME "openvino-linux-x64-20230302-abi0")
+      else()
+        set(OPENVINO_FILENAME "openvino-linux-x64-20230302")
+      endif()
+    endif()
+  endif()
+  set(OPENVINO_URL "${OPENVINO_URL_PREFIX}${OPENVINO_FILENAME}${COMPRESSED_SUFFIX}")
+
+  download_and_decompress(${OPENVINO_URL}
+      ${CMAKE_CURRENT_BINARY_DIR}/${OPENVINO_FILENAME}${COMPRESSED_SUFFIX}
+      ${THIRD_PARTY_PATH}/install)
+
+  if(EXISTS ${THIRD_PARTY_PATH}/install/openvino)
+    file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/openvino) 
+  endif()
+
+  file(RENAME ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME} ${THIRD_PARTY_PATH}/install/openvino)
+  set(OPENVINO_FILENAME openvino)
+
+  set(OPENVINO_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/runtime)
+  set(OPENVINO_INSTALL_INC_DIR
+    "${OPENVINO_INSTALL_DIR}/include"
+    "${OPENVINO_INSTALL_DIR}/include/ie"
+    CACHE PATH "openvino install include directory." FORCE)
+    
+  set(OPENVINO_LIB_DIR
+    "${OPENVINO_INSTALL_DIR}/lib/"
+    "${OPENVINO_INSTALL_DIR}/3rdparty/tbb/lib/"
+    CACHE PATH "openvino lib directory." FORCE)
+  set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENVINO_LIB_DIR}")
+
+  # For OPENVINO code to include internal headers.
+  include_directories(${OPENVINO_INSTALL_INC_DIR})
+
+  if(WIN32)
+    file(GLOB_RECURSE OPENVINO_LIB_FILES ${OPENVINO_INSTALL_DIR}/lib/intel64/Release/*)
+    file(COPY ${OPENVINO_LIB_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/lib/)
+    file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/lib/intel64)
+
+    file(GLOB_RECURSE OPENVINO_BIN_FILES ${OPENVINO_INSTALL_DIR}/bin/intel64/Release/*)
+    file(COPY ${OPENVINO_BIN_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/bin/)
+    file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/bin/intel64)
+  elseif(APPLE)
+    file(GLOB_RECURSE OPENVINO_LIB_FILES ${OPENVINO_INSTALL_DIR}/lib/intel64/Release/*)
+    file(COPY ${OPENVINO_LIB_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/lib/)
+    file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/lib/intel64)
+  else()
+    file(GLOB_RECURSE OPENVINO_LIB_FILES ${OPENVINO_INSTALL_DIR}/lib/intel64/*)
+    file(COPY ${OPENVINO_LIB_FILES} DESTINATION ${OPENVINO_INSTALL_DIR}/lib/)
+    file(REMOVE_RECURSE ${OPENVINO_INSTALL_DIR}/lib/intel64)
+  endif()
+
+  file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/docs)
+  file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/install_dependencies)
+  file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/samples)
+  file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/setupvars.sh)
+  file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/${OPENVINO_FILENAME}/tools)
+  get_openvino_libs(${OPENVINO_INSTALL_DIR})
+endif()
+message("OPENVINO_LIBS = ${OPENVINO_LIBS}")
+list(APPEND DEPEND_LIBS ${OPENVINO_LIBS})
diff --git a/libs/ultrainfer/cmake/paddle2onnx.cmake b/libs/ultrainfer/cmake/paddle2onnx.cmake
new file mode 100755
index 0000000000..3e5d2f4293
--- /dev/null
+++ b/libs/ultrainfer/cmake/paddle2onnx.cmake
@@ -0,0 +1,90 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+set(PADDLE2ONNX_PROJECT "extern_paddle2onnx")
+set(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx)
+set(PADDLE2ONNX_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/paddle2onnx/src/${PADDLE2ONNX_PROJECT})
+set(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx)
+set(PADDLE2ONNX_INC_DIR
+    "${PADDLE2ONNX_INSTALL_DIR}/include"
+    CACHE PATH "paddle2onnx include directory." FORCE)
+set(PADDLE2ONNX_LIB_DIR
+    "${PADDLE2ONNX_INSTALL_DIR}/lib/"
+    CACHE PATH "paddle2onnx lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
+                      "${PADDLE2ONNX_LIB_DIR}")
+
+include_directories(BEFORE ${PADDLE2ONNX_INC_DIR})
+if(WIN32)
+  set(PADDLE2ONNX_COMPILE_LIB
+      "${PADDLE2ONNX_INSTALL_DIR}/lib/paddle2onnx.lib"
+      CACHE FILEPATH "paddle2onnx compile library." FORCE)
+elseif(APPLE)
+  set(PADDLE2ONNX_COMPILE_LIB
+      "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.dylib"
+      CACHE FILEPATH "paddle2onnx compile library." FORCE)
+else()
+  set(PADDLE2ONNX_COMPILE_LIB
+      "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.so"
+      CACHE FILEPATH "paddle2onnx compile library." FORCE)
+endif(WIN32)
+
+if (NOT PADDLE2ONNX_URL)
+  # Use default paddle2onnx url if custom url is not setting
+  set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+  set(PADDLE2ONNX_VERSION "1.0.8rc")
+  if(WIN32)
+    set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
+    if(NOT CMAKE_CL_64)
+      set(PADDLE2ONNX_FILE "paddle2onnx-win-x86-${PADDLE2ONNX_VERSION}.zip")
+    endif()
+  elseif(APPLE)
+    if(CURRENT_OSX_ARCH MATCHES "arm64")
+      set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz")
+    else()
+      set(PADDLE2ONNX_FILE "paddle2onnx-osx-x86_64-${PADDLE2ONNX_VERSION}.tgz")
+    endif()
+  else()
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+      set(PADDLE2ONNX_FILE "paddle2onnx-linux-aarch64-${PADDLE2ONNX_VERSION}.tgz")
+    else()
+      set(PADDLE2ONNX_FILE "paddle2onnx-linux-x64-${PADDLE2ONNX_VERSION}.tgz")
+    endif()
+  endif()
+  set(PADDLE2ONNX_URL "${PADDLE2ONNX_URL_BASE}${PADDLE2ONNX_FILE}")
+endif()
+
+ExternalProject_Add(
+  ${PADDLE2ONNX_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${PADDLE2ONNX_URL}
+  PREFIX ${PADDLE2ONNX_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E remove_directory ${PADDLE2ONNX_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E make_directory ${PADDLE2ONNX_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E rename ${PADDLE2ONNX_SOURCE_DIR}/lib/
+    ${PADDLE2ONNX_LIB_DIR} && ${CMAKE_COMMAND} -E copy_directory
+    ${PADDLE2ONNX_SOURCE_DIR}/include ${PADDLE2ONNX_INC_DIR}
+  BUILD_BYPRODUCTS ${PADDLE2ONNX_COMPILE_LIB})
+
+add_library(external_paddle2onnx STATIC IMPORTED GLOBAL)
+set_property(TARGET external_paddle2onnx PROPERTY IMPORTED_LOCATION
+                                         ${PADDLE2ONNX_COMPILE_LIB})
+add_dependencies(external_paddle2onnx ${PADDLE2ONNX_PROJECT})
diff --git a/libs/ultrainfer/cmake/paddle_inference.cmake b/libs/ultrainfer/cmake/paddle_inference.cmake
new file mode 100755
index 0000000000..abe562b50b
--- /dev/null
+++ b/libs/ultrainfer/cmake/paddle_inference.cmake
@@ -0,0 +1,329 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+# The priority strategy for Paddle inference is as follows:
+# PADDLEINFERENCE_DIRECTORY > custom PADDLEINFERENCE_URL > default PADDLEINFERENCE_URL.
+
+if(WITH_GPU AND WITH_IPU)
+  message(FATAL_ERROR "Cannot build with WITH_GPU=ON and WITH_IPU=ON on the same time.")
+endif()
+
+# Custom options for Paddle Inference backend
+option(PADDLEINFERENCE_DIRECTORY "Directory of custom Paddle Inference library" OFF)
+option(PADDLEINFERENCE_API_CUSTOM_OP "Whether building with custom paddle ops" OFF)
+option(PADDLEINFERENCE_API_COMPAT_2_4_x "Whether using Paddle Inference 2.4.x" OFF)
+option(PADDLEINFERENCE_API_COMPAT_2_5_x "Whether using Paddle Inference 2.5.x" OFF)
+option(PADDLEINFERENCE_API_COMPAT_2_6_x "Whether using Paddle Inference 2.6.x" OFF)
+option(PADDLEINFERENCE_API_COMPAT_DEV "Whether using Paddle Inference latest dev" OFF)
+option(PADDLEINFERENCE_API_COMPAT_CUDA_SM_80 "Whether using Paddle Inference with CUDA sm_80(A100)" OFF)
+
+set(PADDLEINFERENCE_URL "" CACHE STRING "URL of the custom Paddle Inference library")
+set(PADDLEINFERENCE_VERSION "" CACHE STRING "Paddle Inference version")
+
+set(PADDLEINFERENCE_PROJECT "extern_paddle_inference")
+set(PADDLEINFERENCE_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle_inference)
+set(PADDLEINFERENCE_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/paddle_inference/src/${PADDLEINFERENCE_PROJECT})
+set(PADDLEINFERENCE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle_inference)
+
+set(PADDLEINFERENCE_INC_DIR "${PADDLEINFERENCE_INSTALL_DIR}"
+    CACHE PATH "paddle_inference include directory." FORCE)    
+set(PADDLEINFERENCE_LIB_DIR
+    "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/"
+    CACHE PATH "paddle_inference lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
+                      "${PADDLEINFERENCE_LIB_DIR}")
+
+if(PADDLEINFERENCE_DIRECTORY)
+  set(PADDLEINFERENCE_INC_DIR ${PADDLEINFERENCE_DIRECTORY})
+endif()
+
+include_directories(${PADDLEINFERENCE_INC_DIR})
+
+if(PADDLEINFERENCE_DIRECTORY)
+  # Use custom Paddle Inference libs.
+  if(EXISTS "${THIRD_PARTY_PATH}/install/paddle_inference")
+    file(REMOVE_RECURSE "${THIRD_PARTY_PATH}/install/paddle_inference")
+  endif()
+  if(NOT Python_EXECUTABLE)
+    find_package(Python COMPONENTS Interpreter Development REQUIRED)
+  endif()  
+  message(STATUS "Copying ${PADDLEINFERENCE_DIRECTORY} to ${THIRD_PARTY_PATH}/install/paddle_inference ...")
+  if(WIN32)
+    execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install)
+    execute_process(COMMAND cp -r ${PADDLEINFERENCE_DIRECTORY} ${THIRD_PARTY_PATH}/install/paddle_inference)
+  else()
+    execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install)
+    execute_process(COMMAND cp -r ${PADDLEINFERENCE_DIRECTORY} ${THIRD_PARTY_PATH}/install/paddle_inference)
+    execute_process(COMMAND rm -rf ${THIRD_PARTY_PATH}/install/paddle_inference/paddle/lib/*.a)
+  endif()
+else()
+
+  # Custom Paddle Inference URL
+  if (NOT PADDLEINFERENCE_URL)
+
+    # Use default Paddle Inference libs.
+    set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+    if(WIN32)
+      if (WITH_GPU)
+        set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt8.5.2.2-mkl-2.5.0.281761089e.zip")
+        set(PADDLEINFERENCE_VERSION "2.5.0.281761089e")
+      else()
+        set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-mkl-2.5.0.281761089e.zip")
+        set(PADDLEINFERENCE_VERSION "2.5.0.281761089e")
+      endif()
+    elseif(APPLE)
+      if(CURRENT_OSX_ARCH MATCHES "arm64")
+        message(FATAL_ERROR "Paddle Backend doesn't support Mac OSX with Arm64 now.")
+        set(PADDLEINFERENCE_FILE "paddle_inference-osx-arm64-openblas-0.0.0.660f781b77.tgz")
+      else()
+        # TODO(qiuyanjun): Should remove this old paddle inference lib
+        # set(PADDLEINFERENCE_FILE "paddle_inference-osx-x86_64-2.4-dev3.tgz")
+        set(PADDLEINFERENCE_FILE "paddle_inference-osx-x86_64-openblas-0.0.0.660f781b77.tgz")
+      endif()
+      set(PADDLEINFERENCE_VERSION "0.0.0.660f781b77")
+    else()
+      # Linux with x86/aarch64 CPU/Arm CPU/GPU/IPU ...
+      if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+        message(FATAL_ERROR "Paddle Backend doesn't support linux aarch64 now.")
+      else()
+        # x86_64
+        if(WITH_GPU)
+          if(PADDLEINFERENCE_API_COMPAT_CUDA_SM_80)
+            set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-trt8.5.2.2-mkl-sm70.sm75.sm80.sm86.nodist-2.5.1.tgz")
+            set(PADDLEINFERENCE_VERSION "2.5.1")
+          else()
+            set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-trt8.5.2.2-mkl-sm61.sm70.sm75.sm86.nodist-2.5.1.tgz")
+            set(PADDLEINFERENCE_VERSION "2.5.1")
+          endif()
+        else()
+          set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-mkl-2.5.1.tgz")
+          set(PADDLEINFERENCE_VERSION "2.5.1")
+        endif()
+        if(WITH_IPU)
+          set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-ipu-2.4-dev1.tgz")
+          # TODO(qiuyanjun): Should use the commit id to tag the version
+          set(PADDLEINFERENCE_VERSION "2.4-dev1")
+        endif()
+        if(WITH_KUNLUNXIN)
+          set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-xpu-openblas-0.0.0.021fd73536.tgz")
+          set(PADDLEINFERENCE_VERSION "0.0.0.021fd73536")
+        endif()
+
+        if(NEED_ABI0)
+          if(WITH_GPU OR WITH_IPU OR WITH_KUNLUNXIN)
+            message(WARNING "While NEED_ABI0=ON, only support CPU now, will fallback to CPU.")
+          endif()
+          set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-2.4.0-abi0.tgz")
+          set(PADDLEINFERENCE_VERSION "2.4.0-abi0")
+        endif()
+      endif()
+    endif()
+    set(PADDLEINFERENCE_URL "${PADDLEINFERENCE_URL_BASE}${PADDLEINFERENCE_FILE}")
+
+  endif(PADDLEINFERENCE_URL)
+  
+  ExternalProject_Add(
+    ${PADDLEINFERENCE_PROJECT}
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    URL ${PADDLEINFERENCE_URL}
+    PREFIX ${PADDLEINFERENCE_PREFIX_DIR}
+    DOWNLOAD_NO_PROGRESS 1
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    UPDATE_COMMAND ""
+    INSTALL_COMMAND
+  	${CMAKE_COMMAND} -E copy_directory ${PADDLEINFERENCE_SOURCE_DIR} ${PADDLEINFERENCE_INSTALL_DIR}
+    BUILD_BYPRODUCTS ${PADDLEINFERENCE_COMPILE_LIB})
+
+endif(PADDLEINFERENCE_DIRECTORY)
+
+if (PADDLEINFERENCE_VERSION STREQUAL "")
+  message(FATAL_ERROR "The Paddle Inference version is unspecified and cannot be determined.")
+endif()
+string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ "${PADDLEINFERENCE_VERSION}")
+set(PADDLEINFERENCE_VERSION_MAJOR "${CMAKE_MATCH_1}")
+set(PADDLEINFERENCE_VERSION_MINOR "${CMAKE_MATCH_2}")
+set(PADDLEINFERENCE_VERSION_PATCH "${CMAKE_MATCH_3}")
+add_definitions("-DPADDLEINFERENCE_VERSION_MAJOR=${PADDLEINFERENCE_VERSION_MAJOR}")
+add_definitions("-DPADDLEINFERENCE_VERSION_MINOR=${PADDLEINFERENCE_VERSION_MINOR}")
+add_definitions("-DPADDLEINFERENCE_VERSION_PATCH=${PADDLEINFERENCE_VERSION_PATCH}")
+
+# check libs
+if(WIN32)
+  set(PADDLEINFERENCE_COMPILE_LIB
+      "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/paddle_inference.lib"
+      CACHE FILEPATH "paddle_inference compile library." FORCE)
+  if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2)
+    set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/mkldnn.lib")
+  else()
+    set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onednn/lib/dnnl.lib")
+  endif()
+  set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5md.lib")
+  set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/paddle2onnx.lib")
+  set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/onnxruntime.lib")
+elseif(APPLE)
+  set(PADDLEINFERENCE_COMPILE_LIB
+      "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.dylib"
+      CACHE FILEPATH "paddle_inference compile library." FORCE)
+  if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2)
+    set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2")
+  else()
+    set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onednn/lib/libdnnl.so.3")
+  endif()
+  set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so")
+  set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.dylib")
+  set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.dylib")
+else()
+  set(PADDLEINFERENCE_COMPILE_LIB
+      "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so"
+      CACHE FILEPATH "paddle_inference compile library." FORCE)
+  if(PADDLEINFERENCE_VERSION_MAJOR EQUAL 2)
+    set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2")
+  else()
+    set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onednn/lib/libdnnl.so.3")
+  endif()
+  set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so")
+  set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so")
+  set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so")
+endif(WIN32)
+
+# Path Paddle Inference ELF lib file
+if(UNIX AND (NOT APPLE))
+  set(PATCHELF_SCRIPT ${PROJECT_SOURCE_DIR}/scripts/patch_paddle_inference.py)
+  set(PATCHELF_TARGET ${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so)
+  add_custom_target(
+    patchelf_paddle_inference ALL COMMAND  bash -c 
+    "PATCHELF_EXE=${PATCHELF_EXE} python  ${PATCHELF_SCRIPT} ${PATCHELF_TARGET} ${PADDLEINFERENCE_VERSION}"
+    DEPENDS ${LIBRARY_NAME}
+  )
+  unset(PATCHELF_SCRIPT)
+  unset(PATCHELF_TARGET)
+endif()
+
+add_library(external_paddle_inference STATIC IMPORTED GLOBAL)
+set_property(TARGET external_paddle_inference PROPERTY IMPORTED_LOCATION
+                                         ${PADDLEINFERENCE_COMPILE_LIB})
+add_dependencies(external_paddle_inference ${PADDLEINFERENCE_PROJECT})
+
+
+add_library(external_p2o STATIC IMPORTED GLOBAL)
+set_property(TARGET external_p2o PROPERTY IMPORTED_LOCATION
+        ${P2O_LIB})
+add_dependencies(external_p2o ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_ort STATIC IMPORTED GLOBAL)
+set_property(TARGET external_ort PROPERTY IMPORTED_LOCATION
+        ${ORT_LIB})
+add_dependencies(external_ort ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_dnnl STATIC IMPORTED GLOBAL)
+set_property(TARGET external_dnnl PROPERTY IMPORTED_LOCATION
+                                        ${DNNL_LIB})
+add_dependencies(external_dnnl ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_omp STATIC IMPORTED GLOBAL)
+set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION
+                                        ${OMP_LIB})
+add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT})
+
+# Compatible policy for 2.4.x/2.5.x/2.6.x and latest dev.
+if (NOT WITH_KUNLUNXIN)
+  string(REGEX MATCH "0.0.0" PADDLEINFERENCE_USE_DEV ${PADDLEINFERENCE_VERSION})
+  string(REGEX MATCH "2.4|post24|post2.4" PADDLEINFERENCE_USE_2_4_x ${PADDLEINFERENCE_VERSION})
+  string(REGEX MATCH "2.5|post25|post2.5" PADDLEINFERENCE_USE_2_5_x ${PADDLEINFERENCE_VERSION})
+  string(REGEX MATCH "2.6|post26|post2.6" PADDLEINFERENCE_USE_2_6_x ${PADDLEINFERENCE_VERSION})
+endif()
+  
+if(PADDLEINFERENCE_USE_DEV)
+  set(PADDLEINFERENCE_API_COMPAT_DEV ON CACHE BOOL "" FORCE)
+endif()
+
+if(PADDLEINFERENCE_USE_2_6_x)
+  set(PADDLEINFERENCE_API_COMPAT_2_6_x ON CACHE BOOL "" FORCE)
+endif()
+
+if(PADDLEINFERENCE_USE_2_5_x)
+  set(PADDLEINFERENCE_API_COMPAT_2_5_x ON CACHE BOOL "" FORCE)
+endif()
+
+if(PADDLEINFERENCE_USE_2_4_x AND (NOT PADDLEINFERENCE_API_COMPAT_2_5_x) AND (NOT PADDLEINFERENCE_API_COMPAT_2_6_x) AND (NOT PADDLEINFERENCE_API_COMPAT_DEV))
+  set(PADDLEINFERENCE_API_COMPAT_2_4_x ON CACHE BOOL "" FORCE)
+  message(WARNING "You are using PADDLEINFERENCE_USE_2_4_x:${PADDLEINFERENCE_VERSION}, force PADDLEINFERENCE_API_COMPAT_2_4_x=ON")
+endif()
+
+if(PADDLEINFERENCE_API_COMPAT_2_4_x)
+  add_definitions(-DPADDLEINFERENCE_API_COMPAT_2_4_x)
+endif()
+
+if(PADDLEINFERENCE_API_COMPAT_2_5_x)
+  add_definitions(-DPADDLEINFERENCE_API_COMPAT_2_5_x)
+endif()
+
+if(PADDLEINFERENCE_API_COMPAT_2_6_x)
+  add_definitions(-DPADDLEINFERENCE_API_COMPAT_2_6_x)
+endif()
+
+if(PADDLEINFERENCE_API_COMPAT_DEV)
+  add_definitions(-DPADDLEINFERENCE_API_COMPAT_DEV)
+endif()
+
+# Compatible policy for custom paddle ops
+if(PADDLEINFERENCE_API_COMPAT_2_5_x AND (NOT WITH_KUNLUNXIN))
+  # no c++ standard policy conflicts vs c++ 11
+  # TODO: support custom ops for latest dev
+  set(PADDLEINFERENCE_API_CUSTOM_OP ON CACHE BOOL "" FORCE)
+  # add paddle_inference/paddle/include path for custom ops
+  # the extension.h and it's deps headers are located in 
+  # paddle/include/paddle directory.
+  include_directories(${PADDLEINFERENCE_INC_DIR}/paddle/include)
+  message(WARNING "You are using PADDLEINFERENCE_API_COMPAT_2_5_x:${PADDLEINFERENCE_VERSION}, force PADDLEINFERENCE_API_CUSTOM_OP=${PADDLEINFERENCE_API_CUSTOM_OP}")
+endif()
+
+function(set_paddle_custom_ops_compatible_policy)
+  if(PADDLEINFERENCE_API_CUSTOM_OP AND (NOT WITH_KUNLUNXIN))
+    if(NOT MSVC)
+      # TODO: add non c++ 14 policy for latest dev
+      if(NOT PADDLEINFERENCE_API_COMPAT_2_5_x)
+        # gcc c++ 14 policy for 2.4.x
+        if(NOT DEFINED CMAKE_CXX_STANDARD)
+          set(CMAKE_CXX_STANDARD 14 PARENT_SCOPE)
+          message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON, but CMAKE_CXX_STANDARD is not defined, use c++ 14 by default!")
+        elseif(NOT (CMAKE_CXX_STANDARD EQUAL 14))
+          set(CMAKE_CXX_STANDARD 14 PARENT_SCOPE)
+          message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON, force use c++ 14!")
+        endif()
+      endif()
+      if(WITH_GPU)
+        # cuda c++ 14 policy for 2.4.x
+        if(NOT PADDLEINFERENCE_API_COMPAT_2_5_x)
+          if(NOT DEFINED CMAKE_CUDA_STANDARD)
+            set(CMAKE_CUDA_STANDARD 14 PARENT_SCOPE)
+            message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON and WITH_GPU=ON, but CMAKE_CUDA_STANDARD is not defined, use c++ 14 by default!")
+          elseif(NOT (CMAKE_CUDA_STANDARD EQUAL 14))
+            set(CMAKE_CUDA_STANDARD 14 PARENT_SCOPE)
+            message(WARNING "Found PADDLEINFERENCE_API_CUSTOM_OP=ON and WITH_GPU=ON, force use c++ 14!")
+          endif()
+        endif()
+      endif()
+    endif()
+    # common compile flags for paddle custom ops
+    add_definitions(-DPADDLE_ON_INFERENCE)
+    add_definitions(-DPADDLE_NO_PYTHON)
+    if(WITH_GPU)
+      add_definitions(-DPADDLE_WITH_CUDA)
+    endif()
+  endif()
+endfunction()
diff --git a/libs/ultrainfer/cmake/paddlelite.cmake b/libs/ultrainfer/cmake/paddlelite.cmake
new file mode 100755
index 0000000000..6042ea14c2
--- /dev/null
+++ b/libs/ultrainfer/cmake/paddlelite.cmake
@@ -0,0 +1,105 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+option(PADDLELITE_DIRECTORY "Directory of custom Paddle-Lite library" OFF)
+
+set(PADDLELITE_PROJECT "extern_paddlelite")
+set(PADDLELITE_FILENAME paddlelite)
+set(PADDLELITE_PREFIX_DIR ${THIRD_PARTY_PATH}/${PADDLELITE_FILENAME})
+set(PADDLELITE_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/${PADDLELITE_FILENAME}/src/${PADDLELITE_PROJECT})
+set(PADDLELITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${PADDLELITE_FILENAME})  
+set(PADDLELITE_INC_DIR
+    "${PADDLELITE_INSTALL_DIR}/include"
+    CACHE PATH "paddlelite include directory." FORCE)
+set(PADDLELITE_LIB_DIR
+"${PADDLELITE_INSTALL_DIR}/lib/"
+CACHE PATH "paddlelite lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${PADDLELITE_LIB_DIR}")
+
+if(PADDLELITE_DIRECTORY)
+  set(PADDLELITE_INC_DIR "${PADDLELITE_DIRECTORY}/include")
+endif()
+
+include_directories(${PADDLELITE_INC_DIR})
+
+if(PADDLELITE_DIRECTORY)
+  # Use custom Paddle-Lite libs.
+  if(EXISTS "${THIRD_PARTY_PATH}/install/paddlelite")
+    file(REMOVE_RECURSE "${THIRD_PARTY_PATH}/install/paddlelite")
+  endif()
+  if(WIN32 OR APPLE OR IOS)
+    message(FATAL_ERROR "Doesn't support windows/mac/ios platform with backend Paddle Lite now.")
+  else()
+    message(STATUS "Copying ${PADDLELITE_DIRECTORY} to ${THIRD_PARTY_PATH}/install/paddlelite ...")
+    execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install/paddlelite)
+    execute_process(COMMAND cp -r ${PADDLELITE_DIRECTORY}/include ${THIRD_PARTY_PATH}/install/paddlelite)
+    execute_process(COMMAND mkdir -p ${PADDLELITE_LIB_DIR})
+    execute_process(COMMAND cp -r ${PADDLELITE_DIRECTORY}/lib/ ${PADDLELITE_LIB_DIR})
+    message(${PADDLELITE_LIB_DIR})
+    execute_process(COMMAND rm -rf ${PADDLELITE_LIB_DIR}/*.a)
+    set(PADDLELITE_LIB "${PADDLELITE_LIB_DIR}/libpaddle_full_api_shared.so")
+  endif()
+
+else()
+  # Use default Paddle-Lite libs.
+  set(PADDLELITE_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs")
+  
+  if(WIN32 OR APPLE OR IOS)
+    message(FATAL_ERROR "Doesn't support windows/mac/ios platform with backend Paddle Lite now.")
+  else()
+    set(PADDLELITE_LIB "${PADDLELITE_LIB_DIR}/libpaddle_full_api_shared.so")
+    set(PADDLELITE_REMOVE_LIB "${PADDLELITE_LIB_DIR}/libpaddle_api_full_bundled.a")
+  endif()
+
+  if(NOT PADDLELITE_URL)
+    if(WIN32 OR APPLE OR IOS)
+      message(FATAL_ERROR "Doesn't support windows/mac/ios platform with backend Paddle Lite now.")
+    else() # Linux
+      if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+        set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-linux-arm64-20221209.tgz")
+        set(PADDLELITE_VERSION 0.0.0.20221209)
+      else()
+        message(FATAL_ERROR "Only support Linux aarch64 now, x64 is not supported with backend Paddle Lite.")
+      endif()
+    endif()
+  endif()
+
+  ExternalProject_Add(
+    ${PADDLELITE_PROJECT}
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    URL ${PADDLELITE_URL}
+    PREFIX ${PADDLELITE_PREFIX_DIR}
+    DOWNLOAD_NO_PROGRESS 1
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    UPDATE_COMMAND ""
+    INSTALL_COMMAND
+      ${CMAKE_COMMAND} -E remove_directory ${PADDLELITE_INSTALL_DIR} &&
+      ${CMAKE_COMMAND} -E make_directory ${PADDLELITE_INSTALL_DIR} &&
+      ${CMAKE_COMMAND} -E rename ${PADDLELITE_SOURCE_DIR}/lib/ ${PADDLELITE_INSTALL_DIR}/lib &&
+      ${CMAKE_COMMAND} -E copy_directory ${PADDLELITE_SOURCE_DIR}/include ${PADDLELITE_INC_DIR}
+    BUILD_BYPRODUCTS ${PADDLELITE_LIB})
+
+endif(PADDLELITE_DIRECTORY)
+
+if(UNIX AND (NOT APPLE) AND BUILD_ULTRAINFER_PYTHON)
+  add_custom_target(patchelf_paddle_lite ALL COMMAND  bash -c "PATCHELF_EXE=${PATCHELF_EXE} python ${PROJECT_SOURCE_DIR}/scripts/patch_paddle_lite.py ${PADDLELITE_INSTALL_DIR}/lib/" DEPENDS ${LIBRARY_NAME})
+endif()
+
+add_library(external_paddle_lite STATIC IMPORTED GLOBAL)
+set_property(TARGET external_paddle_lite PROPERTY IMPORTED_LOCATION ${PADDLELITE_LIB})
+add_dependencies(external_paddle_lite ${PADDLELITE_PROJECT})
diff --git a/libs/ultrainfer/cmake/poros.cmake b/libs/ultrainfer/cmake/poros.cmake
new file mode 100755
index 0000000000..edfd7a3909
--- /dev/null
+++ b/libs/ultrainfer/cmake/poros.cmake
@@ -0,0 +1,95 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+if(NOT ENABLE_TRT_BACKEND)
+  message(FATAL_ERROR "While ENABLE_POROS_BACKEND, requires ENABLE_TRT_BACKEND=ON, but now its OFF.")
+endif()
+
+set(POROS_PROJECT "extern_poros")
+set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros)
+set(POROS_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/poros/src/${POROS_PROJECT})
+set(POROS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/poros)
+set(POROS_INC_DIR
+    "${POROS_INSTALL_DIR}/include"
+    CACHE PATH "poros include directory." FORCE)
+set(POROS_LIB_DIR
+    "${POROS_INSTALL_DIR}/lib/"
+    CACHE PATH "poros lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
+                      "${POROS_LIB_DIR}")
+
+include_directories(${POROS_INC_DIR})
+if(WIN32)
+  message(FATAL_ERROR "Poros Backend doesn't support Windows now.")
+elseif(APPLE)
+  message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.")
+else()
+  set(POROS_COMPILE_LIB
+      "${POROS_INSTALL_DIR}/lib/libporos.so"
+      CACHE FILEPATH "poros compile library." FORCE)
+endif(WIN32)
+
+set(POROS_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+set(POROS_VERSION "0.1.0")
+if(WIN32)
+  message(FATAL_ERROR "Poros Backend doesn't support Windows now.")
+elseif(APPLE)
+  message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.")
+else()
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+    message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.")
+  else()
+    if(WITH_GPU)
+        set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz")
+    else()
+      message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.")
+    endif()
+  endif()
+endif()
+set(POROS_URL "${POROS_URL_BASE}${POROS_FILE}")
+
+ExternalProject_Add(
+  ${POROS_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${POROS_URL}
+  PREFIX ${POROS_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E copy_directory ${POROS_SOURCE_DIR} ${POROS_INSTALL_DIR}
+  BUILD_BYPRODUCTS ${POROS_COMPILE_LIB})
+
+add_library(external_poros STATIC IMPORTED GLOBAL)
+set_property(TARGET external_poros PROPERTY IMPORTED_LOCATION
+                                         ${POROS_COMPILE_LIB})
+add_dependencies(external_poros ${POROS_PROJECT})
+
+# Download libtorch.so with ABI=1
+set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip")
+set(TORCH_URL "${TORCH_URL_BASE}${TORCH_FILE}")
+message(STATUS "Use the default Torch lib from: ${TORCH_URL}")
+download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install)
+if(EXISTS ${THIRD_PARTY_PATH}/install/torch)
+  file(REMOVE_RECURSE ${THIRD_PARTY_PATH}/install/torch) 
+endif()
+file(RENAME ${THIRD_PARTY_PATH}/install/libtorch/ ${THIRD_PARTY_PATH}/install/torch)
+set(TORCH_INCLUDE_DIRS ${THIRD_PARTY_PATH}/install/torch/include)
+find_library(TORCH_LIBRARY torch ${THIRD_PARTY_PATH}/install/torch/lib NO_DEFAULT_PATH)
+include_directories(${TORCH_INCLUDE_DIRS})
+list(APPEND DEPEND_LIBS ${TORCH_LIBRARY})
diff --git a/libs/ultrainfer/cmake/rknpu2.cmake b/libs/ultrainfer/cmake/rknpu2.cmake
new file mode 100755
index 0000000000..44bc43fe65
--- /dev/null
+++ b/libs/ultrainfer/cmake/rknpu2.cmake
@@ -0,0 +1,19 @@
+# get RKNPU2_URL
+set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+set(RKNPU2_VERSION "1.4.2b0")
+set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz")
+set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
+
+# download_and_decompress
+download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} ${THIRD_PARTY_PATH}/install/)
+
+# set path
+set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
+
+# include lib
+if (EXISTS ${RKNPU_RUNTIME_PATH})
+    set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so)
+    include_directories(${RKNPU_RUNTIME_PATH}/include)
+else ()
+    message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.")
+endif ()
diff --git a/libs/ultrainfer/cmake/sophgo.cmake b/libs/ultrainfer/cmake/sophgo.cmake
new file mode 100755
index 0000000000..1e6706dbcf
--- /dev/null
+++ b/libs/ultrainfer/cmake/sophgo.cmake
@@ -0,0 +1,7 @@
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+find_package(libsophon REQUIRED)
+message(${LIBSOPHON_LIB_DIRS})
+include_directories(${LIBSOPHON_INCLUDE_DIRS})
+message(${LIBSOPHON_LIB_DIRS})
+set(SOPHGO_RT_LIB ${LIBSOPHON_LIB_DIRS}/libbmrt.so)
diff --git a/libs/ultrainfer/cmake/summary.cmake b/libs/ultrainfer/cmake/summary.cmake
new file mode 100755
index 0000000000..282a0396e7
--- /dev/null
+++ b/libs/ultrainfer/cmake/summary.cmake
@@ -0,0 +1,84 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+function(ultrainfer_summary)
+  message(STATUS "")
+  message(STATUS "*************UltraInfer Building Summary**********")
+  message(STATUS "  CMake version             : ${CMAKE_VERSION}")
+  message(STATUS "  CMake command             : ${CMAKE_COMMAND}")
+  message(STATUS "  System                    : ${CMAKE_SYSTEM_NAME}")
+  message(STATUS "  C++ compiler              : ${CMAKE_CXX_COMPILER}")
+  message(STATUS "  C++ standard              : ${CMAKE_CXX_STANDARD}")
+  message(STATUS "  C++ cuda standard         : ${CMAKE_CUDA_STANDARD}")
+  message(STATUS "  C++ compiler version      : ${CMAKE_CXX_COMPILER_VERSION}")
+  message(STATUS "  CXX flags                 : ${CMAKE_CXX_FLAGS}")
+  message(STATUS "  EXE linker flags          : ${CMAKE_EXE_LINKER_FLAGS}")
+  message(STATUS "  Shared linker flags       : ${CMAKE_SHARED_LINKER_FLAGS}")
+  message(STATUS "  Build type                : ${CMAKE_BUILD_TYPE}")
+  get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
+  message(STATUS "  Compile definitions       : ${tmp}")
+  message(STATUS "  CMAKE_PREFIX_PATH         : ${CMAKE_PREFIX_PATH}")
+  message(STATUS "  CMAKE_INSTALL_PREFIX      : ${CMAKE_INSTALL_PREFIX}")
+  message(STATUS "  CMAKE_MODULE_PATH         : ${CMAKE_MODULE_PATH}")
+  message(STATUS "")
+  message(STATUS "  UltraInfer version        : ${ULTRAINFER_VERSION}")
+  message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
+  message(STATUS "  ENABLE_RKNPU2_BACKEND     : ${ENABLE_RKNPU2_BACKEND}")
+  message(STATUS "  ENABLE_HORIZON_BACKEND    : ${ENABLE_HORIZON_BACKEND}")
+  message(STATUS "  ENABLE_SOPHGO_BACKEND     : ${ENABLE_SOPHGO_BACKEND}")
+  message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
+  message(STATUS "  ENABLE_LITE_BACKEND       : ${ENABLE_LITE_BACKEND}")
+  message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
+  message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
+  message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
+  message(STATUS "  ENABLE_TVM_BACKEND        : ${ENABLE_TVM_BACKEND}")
+  message(STATUS "  ENABLE_BENCHMARK          : ${ENABLE_BENCHMARK}")
+  message(STATUS "  ENABLE_VISION             : ${ENABLE_VISION}")
+  message(STATUS "  ENABLE_TEXT               : ${ENABLE_TEXT}")
+  message(STATUS "  ENABLE_FLYCV              : ${ENABLE_FLYCV}")
+  message(STATUS "  ENABLE_CVCUDA             : ${ENABLE_CVCUDA}")
+  message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
+  message(STATUS "  WITH_IPU                  : ${WITH_IPU}")
+  message(STATUS "  WITH_OPENCL               : ${WITH_OPENCL}")
+  message(STATUS "  WITH_TESTING              : ${WITH_TESTING}")
+  message(STATUS "  WITH_ASCEND               : ${WITH_ASCEND}")
+  message(STATUS "  WITH_DIRECTML             : ${WITH_DIRECTML}")
+  message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
+  message(STATUS "  WITH_KUNLUNXIN            : ${WITH_KUNLUNXIN}")
+  message(STATUS "  WITH_CAPI                 : ${WITH_CAPI}")
+  message(STATUS "  WITH_CSHARPAPI            : ${WITH_CSHARPAPI}") 
+  if(ENABLE_ORT_BACKEND)
+    message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
+  endif()
+  if(ENABLE_PADDLE_BACKEND)
+    message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
+  endif()
+  if(ENABLE_POROS_BACKEND)
+    message(STATUS "  Poros version  : ${POROS_VERSION}")
+  endif()
+  if(ENABLE_OPENVINO_BACKEND)
+    message(STATUS "  OpenVINO version          : ${OPENVINO_VERSION}")
+  endif()
+  if(WITH_GPU)
+    message(STATUS "  CUDA_DIRECTORY            : ${CUDA_DIRECTORY}")
+    message(STATUS "  TRT_DRECTORY              : ${TRT_DIRECTORY}")
+  endif()
+  if (${BUILD_ULTRAINFER_PYTHON})
+    message(STATUS "  Python executable         : ${PYTHON_EXECUTABLE}")
+    message(STATUS "  Python includes           : ${PYTHON_INCLUDE_DIR}")
+  endif()
+  if(ENABLE_LITE_BACKEND)
+    message(STATUS "  Paddle Lite version       : ${PADDLELITE_VERSION}")
+  endif()
+endfunction()
diff --git a/libs/ultrainfer/cmake/timvx.cmake b/libs/ultrainfer/cmake/timvx.cmake
new file mode 100755
index 0000000000..6299a3dead
--- /dev/null
+++ b/libs/ultrainfer/cmake/timvx.cmake
@@ -0,0 +1,38 @@
+
+if(NOT ${ENABLE_LITE_BACKEND})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_LITE_BACKEND=ON")
+    set(ENABLE_LITE_BACKEND ON)
+endif()
+if(${ENABLE_PADDLE2ONNX})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE2ONNX=OFF")
+    set(ENABLE_PADDLE2ONNX OFF)
+endif()
+if(${ENABLE_ORT_BACKEND})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_ORT_BACKEND=OFF")
+    set(ENABLE_ORT_BACKEND OFF)
+endif()
+if(${ENABLE_PADDLE_BACKEND})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_BACKEND=OFF")
+    set(ENABLE_PADDLE_BACKEND OFF)
+endif()
+if(${ENABLE_OPENVINO_BACKEND})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENVINO_BACKEND=OFF")
+    set(ENABLE_OPENVINO_BACKEND OFF)
+endif()
+if(${ENABLE_TRT_BACKEND})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TRT_BACKEND=OFF")
+    set(ENABLE_TRT_BACKEND OFF)
+endif()
+
+if(${WITH_GPU})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DWITH_GPU=OFF")
+    set(WITH_GPU OFF)
+endif()
+
+if(${ENABLE_TEXT})
+    set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
+    message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF")
+endif()
+
+install(FILES ${PROJECT_SOURCE_DIR}/cmake/timvx.cmake DESTINATION ${CMAKE_INSTALL_PREFIX})
+install(FILES ${PROJECT_SOURCE_DIR}/cmake/toolchain.cmake DESTINATION ${CMAKE_INSTALL_PREFIX})
diff --git a/libs/ultrainfer/cmake/toolchain.cmake b/libs/ultrainfer/cmake/toolchain.cmake
new file mode 100755
index 0000000000..c0a7edc81f
--- /dev/null
+++ b/libs/ultrainfer/cmake/toolchain.cmake
@@ -0,0 +1,45 @@
+if (DEFINED TARGET_ABI)
+    set(CMAKE_SYSTEM_NAME Linux)
+    set(CMAKE_BUILD_TYPE MinSizeRel)
+    if(${TARGET_ABI} MATCHES "armhf")
+        set(CMAKE_SYSTEM_PROCESSOR arm)
+        if(NOT CMAKE_C_COMPILER)
+            set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
+        endif()
+        if(NOT CMAKE_CXX_COMPILER)
+            set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
+        endif()
+        set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
+        set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
+        set(OPENCV_URL "https://bj.bcebos.com/fastdeploy/third_libs/opencv-linux-armv7hf-4.6.0.tgz")
+        set(OPENCV_FILENAME "opencv-linux-armv7hf-4.6.0")
+        if(WITH_TIMVX)
+            set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-armhf-timvx-20230316.tgz")
+        else()
+            message(STATUS "PADDLELITE_URL will be configured if WITH_TIMVX=ON.")
+        endif()
+        set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs)
+        set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/opencv/lib/cmake/opencv4)
+    elseif(${TARGET_ABI} MATCHES "arm64")
+        set(CMAKE_SYSTEM_PROCESSOR aarch64)
+        if(NOT CMAKE_C_COMPILER)
+            set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
+        endif()
+        if(NOT CMAKE_CXX_COMPILER)
+            set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
+        endif()
+        set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
+        set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
+        set(OPENCV_URL "https://bj.bcebos.com/fastdeploy/third_libs/opencv-linux-aarch64-4.6.0.tgz")
+        set(OPENCV_FILENAME "opencv-linux-aarch64-4.6.0")
+        if(WITH_TIMVX)
+            set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-aarch64-timvx-20230316.tgz")
+        else()
+            set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-arm64-20230316.tgz")
+        endif()
+        set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs)
+        set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/opencv/lib/cmake/opencv4)
+    else()
+        message(FATAL_ERROR "When cross-compiling, please set the -DTARGET_ABI to arm64 or armhf.")
+    endif() 
+endif()
diff --git a/libs/ultrainfer/cmake/tvm.cmake b/libs/ultrainfer/cmake/tvm.cmake
new file mode 100755
index 0000000000..b6dfbebcbb
--- /dev/null
+++ b/libs/ultrainfer/cmake/tvm.cmake
@@ -0,0 +1,55 @@
+# set path
+
+set(TVM_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+set(TVM_VERSION "0.12.0")
+set(TVM_SYSTEM "")
+
+if (${CMAKE_SYSTEM} MATCHES "Darwin")
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
+        set(TVM_SYSTEM "macos-arm64")
+    endif ()
+elseif (${CMAKE_SYSTEM} MATCHES "Linux")
+    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86")
+        set(TVM_SYSTEM "linux-x86")
+    endif ()
+else ()
+    error("TVM only support MacOS in Arm64 or linux in x86")
+endif ()
+set(TVM_FILE "tvm-${TVM_SYSTEM}-${TVM_VERSION}.tgz")
+set(TVM_URL "${TVM_URL_BASE}${TVM_FILE}")
+
+set(TVM_RUNTIME_PATH "${THIRD_PARTY_PATH}/install/tvm")
+execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${TVM_RUNTIME_PATH}")
+download_and_decompress(${TVM_URL}
+        "${CMAKE_CURRENT_BINARY_DIR}/${TVM_FILE}"
+        "${THIRD_PARTY_PATH}/install/")
+include_directories(${TVM_RUNTIME_PATH}/include)
+
+# copy dlpack to third_party
+set(DLPACK_PATH "${THIRD_PARTY_PATH}/install/dlpack")
+execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${DLPACK_PATH}")
+execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory
+        "${PROJECT_SOURCE_DIR}/third_party/dlpack"
+        "${THIRD_PARTY_PATH}/install/dlpack")
+include_directories(${DLPACK_PATH}/include)
+
+set(DMLC_CORE_PATH "${THIRD_PARTY_PATH}/install/dmlc-core")
+execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${DMLC_CORE_PATH}")
+set(DMLC_CORE_URL https://bj.bcebos.com/fastdeploy/third_libs/dmlc-core.tgz)
+download_and_decompress(${DMLC_CORE_URL}
+        "${CMAKE_CURRENT_BINARY_DIR}/dmlc-core.tgz"
+        "${THIRD_PARTY_PATH}/install/")
+include_directories(${DMLC_CORE_PATH}/include)
+
+# include lib
+if (EXISTS ${TVM_RUNTIME_PATH})
+    if (${CMAKE_SYSTEM} MATCHES "Darwin")
+        set(TVM_RUNTIME_LIB ${TVM_RUNTIME_PATH}/lib/libtvm_runtime.dylib)
+    elseif (${CMAKE_SYSTEM} MATCHES "Linux")
+        set(TVM_RUNTIME_LIB ${TVM_RUNTIME_PATH}/lib/libtvm_runtime.so)
+    endif ()
+    include(${TVM_RUNTIME_PATH}/lib/cmake/tvm/tvmConfig.cmake)
+    add_definitions(-DDMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
+else ()
+    error(FATAL_ERROR "[tvm.cmake] TVM_RUNTIME_PATH does not exist.")
+endif ()
diff --git a/libs/ultrainfer/cmake/utils.cmake b/libs/ultrainfer/cmake/utils.cmake
new file mode 100755
index 0000000000..07c492d2ff
--- /dev/null
+++ b/libs/ultrainfer/cmake/utils.cmake
@@ -0,0 +1,223 @@
+# This function comes from https://blog.csdn.net/yindongjie1221/article/details/90614261
+function(redefine_file_macro targetname)
+    get_target_property(source_files "${targetname}" SOURCES)
+    foreach(sourcefile ${source_files})
+        get_property(defs SOURCE "${sourcefile}"
+            PROPERTY COMPILE_DEFINITIONS)
+        get_filename_component(filepath "${sourcefile}" ABSOLUTE)
+        string(REPLACE ${PROJECT_SOURCE_DIR}/ "" relpath ${filepath})
+        list(APPEND defs "__REL_FILE__=\"${relpath}\"")
+        set_property(
+            SOURCE "${sourcefile}"
+            PROPERTY COMPILE_DEFINITIONS ${defs}
+            )
+    endforeach()
+endfunction()
+
+function(download_and_decompress url filename decompress_dir)
+  if(NOT EXISTS ${filename})
+    message("Downloading file from ${url} to ${filename} ...")
+    file(DOWNLOAD ${url} "${filename}.tmp" SHOW_PROGRESS)
+    file(RENAME "${filename}.tmp" ${filename})
+  endif()
+  if(NOT EXISTS ${decompress_dir})
+    file(MAKE_DIRECTORY ${decompress_dir})
+  endif()
+  message("Decompress file ${filename} ...")
+  execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${filename} WORKING_DIRECTORY ${decompress_dir})
+endfunction()
+
+function(get_openvino_libs OPENVINO_RUNTIME_DIR)
+  set(LIB_LIST "")
+  find_library(OPENVINO_LIB openvino PATHS ${OPENVINO_RUNTIME_DIR}/lib/ ${OPENVINO_RUNTIME_DIR}/lib/intel64 NO_DEFAULT_PATH)
+  list(APPEND LIB_LIST ${OPENVINO_LIB})
+
+  set(TBB_DIR ${OPENVINO_RUNTIME_DIR}/3rdparty/tbb/lib/cmake)
+  message(STATUS "TBB_DIR: ${TBB_DIR}")
+  find_package(TBB PATHS ${TBB_DIR})
+  if (TBB_FOUND)
+    # 2024.10.22(zhangyue): Use openvino with tbb on linux
+    set(TBB_LIB "${OPENVINO_RUNTIME_DIR}/3rdparty/tbb/lib/libtbb.so.12")
+    list(APPEND LIB_LIST ${TBB_LIB})
+  else()
+    # TODO(zhoushunjie): Use openvino with tbb on linux in future.
+    set(OMP_LIB "${OPENVINO_RUNTIME_DIR}/3rdparty/omp/lib/libiomp5.so")
+    list(APPEND LIB_LIST ${OMP_LIB})
+  endif()
+  set(OPENVINO_LIBS ${LIB_LIST} PARENT_SCOPE)
+endfunction()
+
+function(remove_duplicate_libraries libraries)
+  list(LENGTH ${libraries} lib_length)
+  set(libraries_temp "")
+  set(full_libraries "")
+  foreach(lib_path ${${libraries}})
+    get_filename_component(lib_name ${lib_path} NAME)
+    list(FIND libraries_temp ${lib_name} lib_idx)
+    if (${lib_idx} EQUAL -1)
+      list(APPEND libraries_temp ${lib_name})
+      list(APPEND full_libraries ${lib_path})
+    endif()
+  endforeach()
+  set(${libraries} ${full_libraries} PARENT_SCOPE)
+endfunction()
+
+function(get_windows_path win_path origin_path)
+  STRING(REGEX REPLACE "/" "\\\\" _win_path ${origin_path})
+  set(${win_path} ${_win_path} PARENT_SCOPE)
+endfunction()
+
+function(get_osx_architecture)
+  if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
+    set(CURRENT_OSX_ARCH "arm64" PARENT_SCOPE)
+  elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
+    set(CURRENT_OSX_ARCH "x86_64" PARENT_SCOPE)
+  else()
+    set(CURRENT_OSX_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR} PARENT_SCOPE)
+  endif()
+endfunction()
+
+
+# A fake target to include all the libraries and tests the ultrainfer module depends.
+add_custom_target(fd_compile_deps COMMAND echo 1)
+
+# A function to grep LINK_ONLY dependencies from INTERFACE_LINK_LIBRARIES
+function(regrex_link_only_libraries OUTPUT_DEPS PUBLIC_DEPS)
+  string(JOIN "#" _public_deps ${PUBLIC_DEPS})
+  string(REPLACE "$<LINK_ONLY:" "" _public_deps ${_public_deps})
+  string(REPLACE ">" "" _public_deps ${_public_deps})
+  string(REPLACE "#" ";" _public_deps ${_public_deps})
+  set(${OUTPUT_DEPS} ${_public_deps} PARENT_SCOPE)
+endfunction()
+
+# Bundle several static libraries into one. This function is modified from Paddle Lite. 
+# reference: https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/cmake/lite.cmake#L252
+function(bundle_static_library tgt_name bundled_tgt_name fake_target)
+  list(APPEND static_libs ${tgt_name})
+  add_dependencies(fd_compile_deps ${fake_target})
+  # Set redundant static libs here, protobuf is already available 
+  # in the Paddle Lite static library. So, we don't need protobuf 
+  # in opencv. And there is no need for opencv_dnn, opencv_ml, 
+  # opencv_flann and some other modules. Therefore, we chose
+  # to discard these redundant modules.
+  set(REDUNDANT_STATIC_LIBS opencv_dnn opencv_calib3d opencv_photo 
+      opencv_flann opencv_objdetect opencv_stitching opencv_gapi 
+      opencv_ml libprotobuf)
+
+  function(_recursively_collect_dependencies input_target)
+    list(FIND REDUNDANT_STATIC_LIBS ${input_target} _input_redunant_id)
+    if(${_input_redunant_id} GREATER 0)
+      return()
+    endif()
+    set(_input_link_libraries LINK_LIBRARIES)
+    # https://cmake.org/cmake/help/latest/prop_tgt/TYPE.html
+    get_target_property(_input_type ${input_target} TYPE)
+    # In OpenCVModules.cmake, they set the deps of modules
+    # (opencv_core,...) as INTERFACE_LINK_LIBRARIES. The 
+    # 'Type' of opencv static lib is set as 'STATIC_LIBRARY'.
+    if ((${_input_type} STREQUAL "INTERFACE_LIBRARY")
+         OR (${_input_type} STREQUAL "STATIC_LIBRARY"))
+      set(_input_link_libraries INTERFACE_LINK_LIBRARIES)
+    endif()
+    get_target_property(_public_dependencies ${input_target} ${_input_link_libraries})
+    regrex_link_only_libraries(public_dependencies "${_public_dependencies}")
+    
+    foreach(dependency IN LISTS public_dependencies)
+      if(TARGET ${dependency})
+        get_target_property(alias ${dependency} ALIASED_TARGET)
+        if (TARGET ${alias})
+          set(dependency ${alias})
+        endif()
+        get_target_property(_type ${dependency} TYPE)
+        list(FIND REDUNDANT_STATIC_LIBS ${dependency} _deps_redunant_id)
+        if (${_type} STREQUAL "STATIC_LIBRARY" AND 
+            (NOT (${_deps_redunant_id} GREATER 0)))
+          list(APPEND static_libs ${dependency})
+        endif()
+
+        get_property(library_already_added
+          GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency})
+        if (NOT library_already_added)
+          set_property(GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency} ON)
+          if(NOT (${_deps_redunant_id} GREATER 0))
+            _recursively_collect_dependencies(${dependency})
+          endif()
+        endif()
+      endif()
+    endforeach()
+    set(static_libs ${static_libs} PARENT_SCOPE)
+  endfunction()
+
+  _recursively_collect_dependencies(${tgt_name})
+
+  list(REMOVE_DUPLICATES static_libs)
+  list(REMOVE_ITEM static_libs ${REDUNDANT_STATIC_LIBS})
+  message(STATUS "WITH_STATIC_LIB=${WITH_STATIC_LIB}, Found all needed static libs from dependecy tree: ${static_libs}")
+  message(STATUS "Exclude some redundant static libs: ${REDUNDANT_STATIC_LIBS}")
+
+  set(bundled_tgt_full_name
+    ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX})
+
+  message(STATUS "Use bundled_tgt_full_name:  ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+
+  if(WIN32)
+    message(FATAL_ERROR "Not support UltraInfer static lib for windows now.")
+  endif()
+
+  add_custom_target(${fake_target} ALL COMMAND ${CMAKE_COMMAND} -E echo "Building fake_target ${fake_target}")
+  add_dependencies(${fake_target} ${tgt_name})
+  # add_dependencies(${fake_target} fastdelpoy_dummy)
+
+  if(NOT IOS AND NOT APPLE)
+    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in
+      "CREATE ${bundled_tgt_full_name}\n" )
+
+    foreach(tgt IN LISTS static_libs)
+      file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in
+        "ADDLIB $<TARGET_FILE:${tgt}>\n")
+    endforeach()
+
+    file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in "SAVE\n")
+    file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in "END\n")
+
+    file(GENERATE
+      OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar
+      INPUT ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar.in)
+
+    set(ar_tool ${CMAKE_AR})
+    if (CMAKE_INTERPROCEDURAL_OPTIMIZATION)
+      set(ar_tool ${CMAKE_CXX_COMPILER_AR})
+    endif()
+    message(STATUS "Found ar_tool: ${ar_tool}")
+
+    add_custom_command(
+      TARGET ${fake_target} PRE_BUILD
+      COMMAND rm -f ${bundled_tgt_full_name}
+      COMMAND ${ar_tool} -M < ${CMAKE_CURRENT_BINARY_DIR}/${bundled_tgt_name}.ar
+      COMMENT "Bundling ${bundled_tgt_name}"
+      COMMAND ${CMAKE_STRIP} --strip-unneeded ${CMAKE_CURRENT_BINARY_DIR}/lib${bundled_tgt_name}.a
+      COMMENT "Stripped unneeded debug symbols in ${bundled_tgt_name}"
+      DEPENDS ${tgt_name}
+      VERBATIM)
+  else()
+    foreach(lib ${static_libs})
+      set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
+    endforeach()
+    add_custom_command(
+      TARGET ${fake_target} PRE_BUILD
+      COMMAND rm -f ${bundled_tgt_full_name}
+      COMMAND /usr/bin/libtool -static -o ${bundled_tgt_full_name} ${libfiles}
+      COMMENT "Bundling ${bundled_tgt_name}"
+      COMMAND ${CMAKE_STRIP} -S ${CMAKE_CURRENT_BINARY_DIR}/lib${bundled_tgt_name}.a
+      COMMENT "Stripped unneeded debug symbols in ${bundled_tgt_name}"
+      DEPENDS ${tgt_name}
+    )
+  endif()
+
+  add_library(${bundled_tgt_name} STATIC IMPORTED GLOBAL)
+  set_property(TARGET ${bundled_tgt_name} PROPERTY IMPORTED_LOCATION
+                                         ${bundled_tgt_full_name})          
+  add_dependencies(${bundled_tgt_name} ${fake_target})
+  add_dependencies(${bundled_tgt_name} ${tgt_name})
+
+endfunction()
diff --git a/libs/ultrainfer/cpack/debian_postinst.in b/libs/ultrainfer/cpack/debian_postinst.in
new file mode 100755
index 0000000000..f4d8d2d206
--- /dev/null
+++ b/libs/ultrainfer/cpack/debian_postinst.in
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+case "$1" in
+    configure)
+        PLATFORM=`uname`
+        ULTRAINFER_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@
+
+        echo "=============== Information ======================"
+        echo "UltraInfer Library Path: $ULTRAINFER_LIBRARY_PATH"
+        echo "Platform: $PLATFORM"
+        echo "=================================================="
+
+        # Find all the .so files' path
+        ALL_SO_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"`
+        for SO_FILE in $ALL_SO_FILES;do
+            LIBS_DIRECOTRIES[${#LIBS_DIRECOTRIES[@]}]=${SO_FILE%/*}
+        done
+
+        # Remove the dumplicate directories
+        LIBS_DIRECOTRIES=($(awk -v RS=' ' '!a[$1]++' <<< ${LIBS_DIRECOTRIES[@]}))
+
+        IMPORT_PATH=""
+        for LIB_DIR in ${LIBS_DIRECOTRIES[@]};do
+            echo "Found Library Directory: $LIB_DIR"
+            echo ${LIB_DIR} >> @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf
+        done
+
+        if [ ! -d /etc/ld.so.conf.d ]; then
+            mkdir -p /etc/ld.so.conf.d
+        fi
+        ln -sf @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf /etc/ld.so.conf.d
+
+        ldconfig
+
+        echo "UltraInfer is successfully installed and configured."
+        echo "Now please get started with UltraInfer examples at $ULTRAINFER_LIBRARY_PATH/examples."
+        echo "And don't forget to set ULTRAINFER_INSTALL_DIR=$ULTRAINFER_LIBRARY_PATH in cmake when building examples."
+        ;;
+    *)
+        echo "postinst called with unknown argument \`$1'" >&2
+        exit 1
+esac
diff --git a/libs/ultrainfer/cpack/debian_prerm.in b/libs/ultrainfer/cpack/debian_prerm.in
new file mode 100755
index 0000000000..a3a4c1ab35
--- /dev/null
+++ b/libs/ultrainfer/cpack/debian_prerm.in
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+case "$1" in
+    remove|deconfigure)
+        rm -f /etc/ld.so.conf.d/@CPACK_PACKAGE_NAME@.conf
+        rm @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf
+        rm -rf @CMAKE_INSTALL_PREFIX@/third_libs/install/tensorrt/lib/
+
+        ldconfig
+        echo "UltraInfer is going to be uninstalled."
+        ;;
+esac
diff --git a/libs/ultrainfer/cpack/rpm_postinst.in b/libs/ultrainfer/cpack/rpm_postinst.in
new file mode 100755
index 0000000000..d9f2989fb2
--- /dev/null
+++ b/libs/ultrainfer/cpack/rpm_postinst.in
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+PLATFORM=`uname`
+ULTRAINFER_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@
+
+echo "=============== Information ======================"
+echo "UltraInfer Library Path: $ULTRAINFER_LIBRARY_PATH"
+echo "Platform: $PLATFORM"
+echo "=================================================="
+
+# Find all the .so files' path
+ALL_SO_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"`
+for SO_FILE in $ALL_SO_FILES;do
+    LIBS_DIRECOTRIES[${#LIBS_DIRECOTRIES[@]}]=${SO_FILE%/*}
+done
+
+# Remove the dumplicate directories
+LIBS_DIRECOTRIES=($(awk -v RS=' ' '!a[$1]++' <<< ${LIBS_DIRECOTRIES[@]}))
+
+IMPORT_PATH=""
+for LIB_DIR in ${LIBS_DIRECOTRIES[@]};do
+    echo "Found Library Directory: $LIB_DIR"
+    echo ${LIB_DIR} >> @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf
+done
+
+if [ ! -d /etc/ld.so.conf.d ]; then
+    mkdir -p /etc/ld.so.conf.d
+fi
+ln -sf @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf /etc/ld.so.conf.d
+
+ldconfig
+
+echo "UltraInfer is successfully installed and configured."
+echo "Now please get started with UltraInfer examples at $ULTRAINFER_LIBRARY_PATH/examples."
+echo "And don't forget to set ULTRAINFER_INSTALL_DIR=$ULTRAINFER_LIBRARY_PATH in cmake when building examples."
diff --git a/libs/ultrainfer/cpack/rpm_postrm.in b/libs/ultrainfer/cpack/rpm_postrm.in
new file mode 100755
index 0000000000..8c63660390
--- /dev/null
+++ b/libs/ultrainfer/cpack/rpm_postrm.in
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+rm -f /etc/ld.so.conf.d/@CPACK_PACKAGE_NAME@.conf
+rm @CMAKE_INSTALL_PREFIX@/@CPACK_PACKAGE_NAME@.conf
+rm -rf @CMAKE_INSTALL_PREFIX@/third_libs/install/tensorrt/lib/
+
+ldconfig
+echo "UltraInfer has been uninstalled."
diff --git a/libs/ultrainfer/python/__init__.py b/libs/ultrainfer/python/__init__.py
new file mode 100755
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/ultrainfer/python/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/ultrainfer/python/requirements.txt b/libs/ultrainfer/python/requirements.txt
new file mode 100755
index 0000000000..02f33802f0
--- /dev/null
+++ b/libs/ultrainfer/python/requirements.txt
@@ -0,0 +1,15 @@
+wheel
+requests
+tqdm
+numpy<2
+opencv-python
+pyyaml
+pillow<10.0.0
+pandas>=0.25.0,<=1.3.5
+pycocotools
+matplotlib
+chinese_calendar
+joblib
+scikit-image
+scikit-learn>=1.3.2
+tokenizers
diff --git a/libs/ultrainfer/python/scripts/__init__.py b/libs/ultrainfer/python/scripts/__init__.py
new file mode 100755
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/ultrainfer/python/scripts/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/ultrainfer/python/scripts/build_gpu.sh b/libs/ultrainfer/python/scripts/build_gpu.sh
new file mode 100755
index 0000000000..720e8a8338
--- /dev/null
+++ b/libs/ultrainfer/python/scripts/build_gpu.sh
@@ -0,0 +1,12 @@
+export ENABLE_ORT_BACKEND=ON
+export ENABLE_OPENVINO_BACKEND=ON
+export ENABLE_PADDLE_BACKEND=ON
+export ENABLE_TRT_BACKEND=ON
+export TRT_DIRECTORY=/ultrainfer/libs/TensorRT-8.4.1.5
+export CUDA_DIRECTORY=/usr/local/cuda
+export ENABLE_VISION=ON
+export WITH_GPU=ON
+export CMAKE_CXX_COMPILER=/usr/local/gcc-8.2/bin/g++
+
+python setup.py build
+python setup.py bdist_wheel
diff --git a/libs/ultrainfer/python/scripts/process_libraries.py.in b/libs/ultrainfer/python/scripts/process_libraries.py.in
new file mode 100755
index 0000000000..68d66f96a4
--- /dev/null
+++ b/libs/ultrainfer/python/scripts/process_libraries.py.in
@@ -0,0 +1,207 @@
+
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import shutil
+import subprocess
+import platform
+
+user_specified_dirs = ['@OPENCV_DIRECTORY@', '@ORT_DIRECTORY@', ]
+PACKAGE_NAME = os.getenv("PACKAGE_NAME", "ultrainfer")
+PY_PACKAGE_NAME = PACKAGE_NAME + "_main"
+
+
+def process_on_linux(current_dir):
+    rpaths = ["$ORIGIN:$ORIGIN/libs"]
+    fd_libs = list()
+    libs_path = os.path.join(current_dir, PACKAGE_NAME, "libs")
+    for f in os.listdir(libs_path):
+        filename = os.path.join(libs_path, f)
+        if not os.path.isfile(filename):
+            continue
+        if f.count(PACKAGE_NAME) and f.count(".so") > 0:
+            fd_libs.append(filename)
+
+    cmake_build_dir = os.path.join(current_dir, ".setuptools-cmake-build")
+    patchelf_bin_path = os.path.join(cmake_build_dir, "third_libs/patchelf/bin/patchelf")
+    if not os.path.exists(patchelf_bin_path):
+        patchelf_bin_path = "patchelf"
+
+    third_libs_path = os.path.join(libs_path, "third_libs")
+
+    # remove some useless opencv file in python wheels to decrease package size
+    if os.path.exists(os.path.join(third_libs_path, "opencv")):
+        for root, dirs, files in os.walk(os.path.join(third_libs_path, "opencv")):
+            for f in files:
+                items = f.strip().split('.')
+                if len(items) != 4:
+                    os.remove(os.path.join(root, f))
+                    continue
+                if items[0].strip() not in ["libopencv_highgui", "libopencv_video", "libopencv_videoio", "libopencv_imgcodecs", "libopencv_imgproc", "libopencv_core",  "libopencv_calib3d", "libopencv_features2d", "libopencv_flann"]:
+                    os.remove(os.path.join(root, f))
+
+    all_libs_paths = [third_libs_path] + user_specified_dirs
+    for path in all_libs_paths:
+        for root, dirs, files in os.walk(path):
+            for d in dirs:
+                if d not in ["lib", "lib64"]:
+                    continue
+                rel_path = os.path.relpath(os.path.join(root, d), libs_path)
+                if path in user_specified_dirs:
+                    # Note(zhoushunjie): Use the absolute path for user_specified_dirs
+                    rpath = os.path.join(root, d)
+                else:
+                    rpath = "$ORIGIN/" + rel_path
+                rpaths.append(rpath)
+    for lib in fd_libs:
+        command = "{} --set-rpath '{}' {}".format(patchelf_bin_path, ":".join(rpaths), lib)
+        if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
+            assert subprocess.Popen(
+                command,
+                shell=True) != 0, "patchelf {} failed, the command: {}".format(
+                    command, lib)
+
+
+def process_on_mac(current_dir):
+    fd_libs = list()
+    libs_path = os.path.join(current_dir, PACKAGE_NAME, "libs")
+    cmake_build_dir = os.path.join(current_dir, ".setuptools-cmake-build")
+    for f in os.listdir(libs_path):
+        filename = os.path.join(libs_path, f)
+        if not os.path.isfile(filename):
+            continue
+        if f.count(PACKAGE_NAME) > 0 and (f.count(".dylib") > 0 or
+                                          f.count(".so") > 0):
+            fd_libs.append(filename)
+
+    commands = list()
+    pre_commands = list()
+    for lib in fd_libs:
+        if lib.count(PY_PACKAGE_NAME) > 0:
+            pre_commands.append(
+                "install_name_tool -delete_rpath {} ".format(cmake_build_dir) + lib)
+            commands.append("install_name_tool -id @loader_path " + lib)
+            commands.append("install_name_tool -add_rpath @loader_path " + lib)
+
+    third_libs_path = os.path.join(libs_path, "third_libs")
+    cmake_third_libs_path = os.path.join(cmake_build_dir, "third_libs", "install")
+    all_libs_paths = [cmake_third_libs_path] + user_specified_dirs
+    for path in all_libs_paths:
+        for root, dirs, files in os.walk(path):
+            for d in dirs:
+                if d not in ["lib", "lib64"]:
+                    continue
+                rel_path = os.path.relpath(os.path.join(root, d), cmake_third_libs_path)
+                if path in user_specified_dirs:
+                    # Note(zhoushunjie): Use the absolute path for user_specified_dirs
+                    need_delete_rpath = os.path.join(root, d)
+                    need_add_rpath = os.path.join(root, d)
+                else:
+                    need_delete_rpath = os.path.join(root, d)
+                    need_add_rpath = "@loader_path/third_libs/" + rel_path
+                for lib in fd_libs:
+                    if lib.count(PY_PACKAGE_NAME) > 0:
+                        pre_commands.append(
+                            "install_name_tool -delete_rpath {} {}".format(need_delete_rpath, lib))
+                        commands.append(
+                            "install_name_tool -add_rpath {} {}".format(need_add_rpath, lib))
+
+    for command in pre_commands:
+        try:
+            os.system(command)
+        except:
+            print("Skip execute command: " + command)
+
+    for command in commands:
+        assert os.system(
+            command) == 0, "command execute failed! command: {}".format(
+            command)
+
+def process_on_windows(current_dir):
+    libs_path = os.path.join(current_dir, PACKAGE_NAME, "libs")
+    third_libs_path = os.path.join(libs_path, "third_libs")
+    for root, dirs, files in os.walk(third_libs_path):
+        for f in files:
+            file_path = os.path.join(root, f)
+            if f.count('onnxruntime') > 0 and f.endswith('.dll'):
+                shutil.copy(file_path, libs_path)
+
+
+def get_all_files(dirname):
+    files = list()
+    for root, dirs, filenames in os.walk(dirname):
+        for f in filenames:
+            fullname = os.path.join(root, f)
+            files.append(fullname)
+    return files
+
+
+def process_libraries(current_dir):
+    if platform.system().lower() == "linux":
+        process_on_linux(current_dir)
+    elif platform.system().lower() == "darwin":
+        process_on_mac(current_dir)
+    elif platform.system().lower() == "windows":
+        process_on_windows(current_dir)
+
+    all_files = get_all_files(os.path.join(current_dir, PACKAGE_NAME, "libs"))
+    package_data = list()
+
+    if platform.system().lower() == "windows":
+
+        def check_windows_legal_file(f):
+            # Note(zhoushunjie): Special case for some library
+            # File 'plugins.xml' is special case of openvino.
+            for special_file in ['plugins.xml']:
+                if special_file in f:
+                    return True
+            return False
+
+        for f in all_files:
+            if f.endswith(".pyd") or f.endswith("lib") or f.endswith(
+                    "dll") or check_windows_legal_file(f):
+                package_data.append(
+                    os.path.relpath(f, os.path.join(current_dir,
+                                                    PACKAGE_NAME)))
+
+        return package_data
+
+    filters = [".vcxproj", ".png", ".java", ".h", ".cc", ".cpp", ".hpp"]
+    for f in all_files:
+        remain = True
+        for flt in filters:
+            if f.count(flt) > 0:
+                remain = False
+        filename = os.path.split(f)[-1]
+# Note(zhoushunjie): To add the trt libs below will increase the size of whl package by 450M.
+        if filename in [
+                "libnvinfer_plugin.so",
+                "libnvinfer.so", "libnvonnxparser.so",
+                "libnvparsers.so", "libnvcaffe_parser.so"
+        ]:
+            continue
+
+        for lib_prefix in ["libnvinfer_plugin.so.8.",
+            "libnvinfer.so.8.", "libnvonnxparser.so.8.",
+            "libnvparsers.so.8.", "libnvcaffe_parser.so.8."]:
+            if filename.startswith(lib_prefix):
+                remain = False
+                break
+
+        if remain:
+            package_data.append(
+                os.path.relpath(f, os.path.join(current_dir, PACKAGE_NAME)))
+    return package_data
diff --git a/libs/ultrainfer/python/setup.py b/libs/ultrainfer/python/setup.py
new file mode 100755
index 0000000000..b456b95c92
--- /dev/null
+++ b/libs/ultrainfer/python/setup.py
@@ -0,0 +1,485 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file refered to github.com/onnx/onnx.git
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import shutil
+import os
+
+TOP_DIR = os.path.realpath(os.path.dirname(__file__))
+TOP_DIR = os.path.split(TOP_DIR)[0]
+PACKAGE_NAME = os.getenv("PACKAGE_NAME", "ultrainfer")
+wheel_name = os.getenv("WHEEL_NAME", "ultrainfer-python")
+
+if not os.path.exists(PACKAGE_NAME):
+    shutil.copytree("ultrainfer", PACKAGE_NAME)
+
+from distutils.spawn import find_executable
+from distutils import sysconfig, log
+import setuptools
+import setuptools.command.build_py
+import setuptools.command.develop
+import setuptools.command.build_ext
+
+from collections import namedtuple
+from contextlib import contextmanager
+import glob
+import shlex
+import subprocess
+import sys
+import platform
+from textwrap import dedent
+import multiprocessing
+
+with open(os.path.join(TOP_DIR, "python", "requirements.txt")) as fin:
+    REQUIRED_PACKAGES = fin.read()
+
+if os.getenv("BUILD_ON_CPU", "OFF") == "ON":
+    os.environ["ENABLE_PADDLE_BACKEND"] = "ON"
+    os.environ["ENABLE_ORT_BACKEND"] = "ON"
+    os.environ["ENABLE_OPENVINO_BACKEND"] = "ON"
+    os.environ["ENABLE_VISION"] = "ON"
+    os.environ["ENABLE_TEXT"] = "ON"
+    os.environ["WITH_GPU"] = "OFF"
+
+setup_configs = dict()
+setup_configs["LIBRARY_NAME"] = PACKAGE_NAME
+setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main"
+# Backend options
+setup_configs["ENABLE_TVM_BACKEND"] = os.getenv("ENABLE_TVM_BACKEND", "OFF")
+setup_configs["ENABLE_RKNPU2_BACKEND"] = os.getenv("ENABLE_RKNPU2_BACKEND", "OFF")
+setup_configs["ENABLE_SOPHGO_BACKEND"] = os.getenv("ENABLE_SOPHGO_BACKEND", "OFF")
+setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "OFF")
+setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND", "OFF")
+setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND", "OFF")
+setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND", "OFF")
+setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
+setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF")
+setup_configs["ENABLE_PADDLE2ONNX"] = os.getenv("ENABLE_PADDLE2ONNX", "OFF")
+setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF")
+setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF")
+setup_configs["ENABLE_CVCUDA"] = os.getenv("ENABLE_CVCUDA", "OFF")
+setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF")
+setup_configs["ENABLE_BENCHMARK"] = os.getenv("ENABLE_BENCHMARK", "OFF")
+# Hardware options
+setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF")
+setup_configs["WITH_IPU"] = os.getenv("WITH_IPU", "OFF")
+setup_configs["WITH_OPENCL"] = os.getenv("WITH_OPENCL", "OFF")
+setup_configs["WITH_TIMVX"] = os.getenv("WITH_TIMVX", "OFF")
+setup_configs["WITH_DIRECTML"] = os.getenv("WITH_DIRECTML", "OFF")
+setup_configs["WITH_ASCEND"] = os.getenv("WITH_ASCEND", "OFF")
+setup_configs["WITH_KUNLUNXIN"] = os.getenv("WITH_KUNLUNXIN", "OFF")
+setup_configs["RKNN2_TARGET_SOC"] = os.getenv("RKNN2_TARGET_SOC", "")
+# Custom deps settings
+setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED")
+setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", "/usr/local/cuda")
+setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "")
+setup_configs["ORT_DIRECTORY"] = os.getenv("ORT_DIRECTORY", "")
+setup_configs["OPENVINO_DIRECTORY"] = os.getenv("OPENVINO_DIRECTORY", "")
+setup_configs["PADDLEINFERENCE_DIRECTORY"] = os.getenv("PADDLEINFERENCE_DIRECTORY", "")
+setup_configs["PADDLEINFERENCE_VERSION"] = os.getenv("PADDLEINFERENCE_VERSION", "")
+setup_configs["PADDLEINFERENCE_URL"] = os.getenv("PADDLEINFERENCE_URL", "")
+setup_configs["PADDLEINFERENCE_API_COMPAT_2_4_x"] = os.getenv(
+    "PADDLEINFERENCE_API_COMPAT_2_4_x", "OFF"
+)
+setup_configs["PADDLEINFERENCE_API_COMPAT_2_5_x"] = os.getenv(
+    "PADDLEINFERENCE_API_COMPAT_2_5_x", "OFF"
+)
+setup_configs["PADDLEINFERENCE_API_COMPAT_2_6_x"] = os.getenv(
+    "PADDLEINFERENCE_API_COMPAT_2_6_x", "OFF"
+)
+setup_configs["PADDLEINFERENCE_API_COMPAT_DEV"] = os.getenv(
+    "PADDLEINFERENCE_API_COMPAT_DEV", "OFF"
+)
+setup_configs["PADDLEINFERENCE_API_CUSTOM_OP"] = os.getenv(
+    "PADDLEINFERENCE_API_CUSTOM_OP", "OFF"
+)
+setup_configs["PADDLE2ONNX_URL"] = os.getenv("PADDLE2ONNX_URL", "")
+setup_configs["PADDLELITE_URL"] = os.getenv("PADDLELITE_URL", "")
+
+# Other settings
+setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF")
+setup_configs["BUILD_PADDLE2ONNX"] = os.getenv("BUILD_PADDLE2ONNX", "OFF")
+
+if setup_configs["RKNN2_TARGET_SOC"] != "" or setup_configs["BUILD_ON_JETSON"] != "OFF":
+    REQUIRED_PACKAGES = REQUIRED_PACKAGES.replace("opencv-python", "")
+
+if wheel_name == "ultrainfer-python":
+    if setup_configs["WITH_GPU"] == "ON" or setup_configs["BUILD_ON_JETSON"] == "ON":
+        wheel_name = "ultrainfer-gpu-python"
+    elif setup_configs["WITH_IPU"] == "ON":
+        wheel_name = "ultrainfer-ipu-python"
+
+if os.getenv("CMAKE_CXX_COMPILER", None) is not None:
+    setup_configs["CMAKE_CXX_COMPILER"] = os.getenv("CMAKE_CXX_COMPILER")
+
+SRC_DIR = os.path.join(TOP_DIR, PACKAGE_NAME)
+PYTHON_SRC_DIR = os.path.join(TOP_DIR, "python", PACKAGE_NAME)
+CMAKE_BUILD_DIR = os.path.join(TOP_DIR, "python", ".setuptools-cmake-build")
+
+WINDOWS = os.name == "nt"
+
+CMAKE = find_executable("cmake3") or find_executable("cmake")
+MAKE = find_executable("make")
+
+setup_requires = []
+extras_require = {}
+
+################################################################################
+# Global variables for controlling the build variant
+################################################################################
+
+# Default value is set to TRUE\1 to keep the settings same as the current ones.
+# However going forward the recomemded way to is to set this to False\0
+USE_MSVC_STATIC_RUNTIME = bool(os.getenv("USE_MSVC_STATIC_RUNTIME", "1") == "1")
+ONNX_NAMESPACE = os.getenv("ONNX_NAMESPACE", "paddle2onnx")
+################################################################################
+# Version
+################################################################################
+
+try:
+    git_version = (
+        subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=TOP_DIR)
+        .decode("ascii")
+        .strip()
+    )
+except (OSError, subprocess.CalledProcessError):
+    git_version = None
+
+extra_version_info = ""
+if setup_configs["PADDLEINFERENCE_VERSION"] != "":
+    extra_version_info += "." + setup_configs["PADDLEINFERENCE_VERSION"]
+
+with open(os.path.join(TOP_DIR, "VERSION_NUMBER")) as version_file:
+    VersionInfo = namedtuple(
+        "VersionInfo",
+        [
+            "version",
+            "git_version",
+            "extra_version_info",
+            "enable_trt_backend",
+            "enable_paddle_backend",
+            "with_gpu",
+        ],
+    )(
+        version=version_file.read().strip(),
+        git_version=git_version,
+        extra_version_info=extra_version_info.strip("."),
+        enable_trt_backend=setup_configs["ENABLE_TRT_BACKEND"],
+        enable_paddle_backend=setup_configs["ENABLE_PADDLE_BACKEND"],
+        with_gpu=setup_configs["WITH_GPU"],
+    )
+
+################################################################################
+# Pre Check
+################################################################################
+
+assert CMAKE, 'Could not find "cmake" executable!'
+
+################################################################################
+# Utilities
+################################################################################
+
+
+@contextmanager
+def cd(path):
+    if not os.path.isabs(path):
+        raise RuntimeError("Can only cd to absolute path, got: {}".format(path))
+    orig_path = os.getcwd()
+    os.chdir(path)
+    try:
+        yield
+    finally:
+        os.chdir(orig_path)
+
+
+################################################################################
+# Customized commands
+################################################################################
+
+
+class NoOptionCommand(setuptools.Command):
+    user_options = []
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+
+def get_all_files(dirname):
+    files = list()
+    for root, dirs, filenames in os.walk(dirname):
+        for f in filenames:
+            fullname = os.path.join(root, f)
+            files.append(fullname)
+    return files
+
+
+class create_version(NoOptionCommand):
+    def run(self):
+        with open(os.path.join(PYTHON_SRC_DIR, "code_version.py"), "w") as f:
+            f.write(
+                dedent(
+                    """\
+            # This file is generated by setup.py. DO NOT EDIT!
+            from __future__ import absolute_import
+            from __future__ import division
+            from __future__ import print_function
+            from __future__ import unicode_literals
+            version = '{version}'
+            git_version = '{git_version}'
+            extra_version_info = '{extra_version_info}'
+            enable_trt_backend = '{enable_trt_backend}'
+            enable_paddle_backend = '{enable_paddle_backend}'
+            with_gpu = '{with_gpu}'
+            """.format(
+                        **dict(VersionInfo._asdict())
+                    )
+                )
+            )
+
+
+class cmake_build(setuptools.Command):
+    """
+    Compiles everything when `python setupmnm.py build` is run using cmake.
+    Custom args can be passed to cmake by specifying the `CMAKE_ARGS`
+    environment variable.
+    The number of CPUs used by `make` can be specified by passing `-j<ncpus>`
+    to `setup.py build`.  By default all CPUs are used.
+    """
+
+    user_options = [
+        (str("jobs="), str("j"), str("Specifies the number of jobs to use with make"))
+    ]
+
+    built = False
+
+    def initialize_options(self):
+        self.jobs = None
+
+    def finalize_options(self):
+        if sys.version_info[0] >= 3:
+            self.set_undefined_options("build", ("parallel", "jobs"))
+        if self.jobs is None and os.getenv("MAX_JOBS") is not None:
+            self.jobs = os.getenv("MAX_JOBS")
+        self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs)
+
+    def run(self):
+        if cmake_build.built:
+            return
+        cmake_build.built = True
+        if not os.path.exists(CMAKE_BUILD_DIR):
+            os.makedirs(CMAKE_BUILD_DIR)
+
+        with cd(CMAKE_BUILD_DIR):
+            build_type = "Release"
+            # configure
+            cmake_args = [
+                CMAKE,
+                "-DPYTHON_INCLUDE_DIR={}".format(sysconfig.get_python_inc()),
+                "-DPYTHON_EXECUTABLE={}".format(sys.executable),
+                "-DBUILD_ULTRAINFER_PYTHON=ON",
+                "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
+                "-DONNX_NAMESPACE={}".format(ONNX_NAMESPACE),
+                "-DPY_EXT_SUFFIX={}".format(
+                    sysconfig.get_config_var("EXT_SUFFIX") or ""
+                ),
+            ]
+            cmake_args.append("-DCMAKE_BUILD_TYPE=%s" % build_type)
+            for k, v in setup_configs.items():
+                cmake_args.append("-D{}={}".format(k, v))
+            if WINDOWS:
+                cmake_args.extend(
+                    [
+                        # we need to link with libpython on windows, so
+                        # passing python version to window in order to
+                        # find python in cmake
+                        "-DPY_VERSION={}".format(
+                            "{0}.{1}".format(*sys.version_info[:2])
+                        ),
+                    ]
+                )
+                if platform.architecture()[0] == "64bit":
+                    cmake_args.extend(["-A", "x64", "-T", "host=x64"])
+                else:
+                    cmake_args.extend(["-A", "Win32", "-T", "host=x86"])
+            if "CMAKE_ARGS" in os.environ:
+                extra_cmake_args = shlex.split(os.environ["CMAKE_ARGS"])
+                # prevent crossfire with downstream scripts
+                del os.environ["CMAKE_ARGS"]
+                log.info("Extra cmake args: {}".format(extra_cmake_args))
+                cmake_args.extend(extra_cmake_args)
+            cmake_args.append(TOP_DIR)
+            subprocess.check_call(cmake_args)
+
+            build_args = [CMAKE, "--build", os.curdir]
+            if WINDOWS:
+                build_args.extend(["--config", build_type])
+                build_args.extend(["--", "/maxcpucount:{}".format(self.jobs)])
+            else:
+                build_args.extend(["--", "-j", str(self.jobs)])
+            subprocess.check_call(build_args)
+
+
+class build_py(setuptools.command.build_py.build_py):
+    def run(self):
+        self.run_command("create_version")
+        self.run_command("cmake_build")
+
+        generated_python_files = glob.glob(
+            os.path.join(CMAKE_BUILD_DIR, PACKAGE_NAME, "*.py")
+        ) + glob.glob(os.path.join(CMAKE_BUILD_DIR, PACKAGE_NAME, "*.pyi"))
+
+        for src in generated_python_files:
+            dst = os.path.join(TOP_DIR, os.path.relpath(src, CMAKE_BUILD_DIR))
+            self.copy_file(src, dst)
+
+        return setuptools.command.build_py.build_py.run(self)
+
+
+class develop(setuptools.command.develop.develop):
+    def run(self):
+        self.run_command("build_py")
+        setuptools.command.develop.develop.run(self)
+
+
+class build_ext(setuptools.command.build_ext.build_ext):
+    def run(self):
+        self.run_command("cmake_build")
+        setuptools.command.build_ext.build_ext.run(self)
+
+    def build_extensions(self):
+        for ext in self.extensions:
+            fullname = self.get_ext_fullname(ext.name)
+            filename = os.path.basename(self.get_ext_filename(fullname))
+
+            lib_path = CMAKE_BUILD_DIR
+            if os.name == "nt":
+                debug_lib_dir = os.path.join(lib_path, "Debug")
+                release_lib_dir = os.path.join(lib_path, "Release")
+                if os.path.exists(debug_lib_dir):
+                    lib_path = debug_lib_dir
+                elif os.path.exists(release_lib_dir):
+                    lib_path = release_lib_dir
+            src = os.path.join(lib_path, filename)
+            dst = os.path.join(os.path.realpath(self.build_lib), PACKAGE_NAME, filename)
+            self.copy_file(src, dst)
+
+
+cmdclass = {
+    "create_version": create_version,
+    "cmake_build": cmake_build,
+    "build_py": build_py,
+    "develop": develop,
+    "build_ext": build_ext,
+}
+
+################################################################################
+# Extensions
+################################################################################
+
+ext_modules = [
+    setuptools.Extension(
+        name=str(PACKAGE_NAME + "." + setup_configs["PY_LIBRARY_NAME"]), sources=[]
+    ),
+]
+
+################################################################################
+# Packages
+################################################################################
+
+# no need to do fancy stuff so far
+if PACKAGE_NAME != "ultrainfer":
+    packages = setuptools.find_packages(exclude=["ultrainfer*", "scripts"])
+else:
+    packages = setuptools.find_packages(exclude=["xencrypt*", "scripts"])
+
+################################################################################
+# Test
+################################################################################
+
+if sys.version_info[0] == 3:
+    # Mypy doesn't work with Python 2
+    extras_require["mypy"] = ["mypy==0.600"]
+
+################################################################################
+# Final
+################################################################################
+
+package_data = {PACKAGE_NAME: ["LICENSE", "ThirdPartyNotices.txt"]}
+
+if sys.argv[1] == "install" or sys.argv[1] == "bdist_wheel":
+    shutil.copy(
+        os.path.join(TOP_DIR, "ThirdPartyNotices.txt"),
+        os.path.join(TOP_DIR, "python", PACKAGE_NAME),
+    )
+    shutil.copy(
+        os.path.join(TOP_DIR, "LICENSE"), os.path.join(TOP_DIR, "python", PACKAGE_NAME)
+    )
+    if not os.path.exists(
+        os.path.join(TOP_DIR, "python", PACKAGE_NAME, "libs", "third_libs")
+    ):
+        print(
+            f"Didn't detect path: {PACKAGE_NAME}/libs/third_libs exist, please execute `python setup.py build` first"
+        )
+        sys.exit(0)
+    from scripts.process_libraries import process_libraries
+
+    all_lib_data = process_libraries(os.path.split(os.path.abspath(__file__))[0])
+    package_data[PACKAGE_NAME].extend(all_lib_data)
+    setuptools.setup(
+        name=wheel_name,
+        version=VersionInfo.version + extra_version_info,
+        ext_modules=ext_modules,
+        description="Deploy Kit Tool For Deeplearning models.",
+        packages=packages,
+        package_data=package_data,
+        include_package_data=True,
+        setup_requires=setup_requires,
+        extras_require=extras_require,
+        author="ultrainfer",
+        install_requires=REQUIRED_PACKAGES,
+        classifiers=[
+            "Programming Language :: Python :: 3",
+            "License :: OSI Approved :: Apache Software License",
+            "Operating System :: OS Independent",
+        ],
+        license="Apache 2.0",
+    )
+else:
+    setuptools.setup(
+        name=wheel_name,
+        version=VersionInfo.version + extra_version_info,
+        description="Deploy Kit Tool For Deeplearning models.",
+        ext_modules=ext_modules,
+        cmdclass=cmdclass,
+        packages=packages,
+        package_data=package_data,
+        include_package_data=False,
+        setup_requires=setup_requires,
+        extras_require=extras_require,
+        author="ultrainfer",
+        install_requires=REQUIRED_PACKAGES,
+        classifiers=[
+            "Programming Language :: Python :: 3",
+            "License :: OSI Approved :: Apache Software License",
+            "Operating System :: OS Independent",
+        ],
+        license="Apache 2.0",
+    )
diff --git a/libs/ultrainfer/python/ultrainfer/__init__.py b/libs/ultrainfer/python/ultrainfer/__init__.py
new file mode 100755
index 0000000000..12bc5a7236
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/__init__.py
@@ -0,0 +1,186 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+import os
+import sys
+import platform
+
+# Create a symbol link to tensorrt library.
+trt_directory = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "libs/third_libs/tensorrt/lib/"
+)
+if os.name != "nt" and os.path.exists(trt_directory):
+    logging.basicConfig(level=logging.INFO)
+    for trt_lib in [
+        "libnvcaffe_parser.so",
+        "libnvinfer_plugin.so",
+        "libnvinfer.so",
+        "libnvonnxparser.so",
+        "libnvparsers.so",
+    ]:
+        dst = os.path.join(trt_directory, trt_lib)
+        src = os.path.join(trt_directory, trt_lib + ".8")
+        if not os.path.exists(dst):
+            try:
+                os.symlink(src, dst)
+                logging.info(f"Create a symbolic link pointing to {src} named {dst}.")
+            except OSError as e:
+                logging.warning(
+                    f"Failed to create a symbolic link pointing to {src} by an unprivileged user. "
+                    "It may failed when you use Paddle TensorRT backend. "
+                    "Please use administator privilege to import ultrainfer at first time."
+                )
+                break
+
+    # HACK: Reset the root logger config that got messed up by FD.
+    root_logger = logging.getLogger()
+    root_logger.level = logging.WARNING
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+
+from .code_version import version, git_version, extra_version_info
+from .code_version import enable_trt_backend, enable_paddle_backend, with_gpu
+
+# Note(zhoushunjie): Fix the import order of paddle and ultrainfer library.
+# This solution will be removed it when the confilct of paddle and
+# ultrainfer is fixed.
+
+# Note(qiuyanjun): Add backward compatible for paddle 2.4.x
+sys_platform = platform.platform().lower()
+
+
+def get_paddle_version():
+    paddle_version = ""
+    try:
+        import pkg_resources
+
+        paddle_version = pkg_resources.require("paddlepaddle-gpu")[0].version.split(
+            ".post"
+        )[0]
+    except:
+        try:
+            paddle_version = pkg_resources.require("paddlepaddle")[0].version.split(
+                ".post"
+            )[0]
+        except:
+            pass
+    return paddle_version
+
+
+def should_import_paddle():
+    if ("paddle2.4" in extra_version_info) or ("post24" in extra_version_info):
+        paddle_version = get_paddle_version()
+        if (
+            paddle_version != ""
+            and paddle_version <= "2.4.2"
+            and paddle_version != "0.0.0"
+        ):
+            return True
+    return False
+
+
+def should_set_tensorrt():
+    if (
+        with_gpu == "ON"
+        and enable_paddle_backend == "ON"
+        and enable_trt_backend == "ON"
+    ):
+        return True
+    return False
+
+
+def tensorrt_is_avaliable():
+    # Note(qiuyanjun): Only support linux now.
+    found_trt_lib = False
+    if ("linux" in sys_platform) and ("LD_LIBRARY_PATH" in os.environ.keys()):
+        for lib_path in os.environ["LD_LIBRARY_PATH"].split(":"):
+            if os.path.exists(os.path.join(lib_path, "libnvinfer.so")):
+                found_trt_lib = True
+                break
+    return found_trt_lib
+
+
+try:
+    # windows: no conflict between ultrainfer and paddle.
+    # linux: must import paddle first to solve the conflict.
+    # macos: still can not solve the conflict between ultrainfer and paddle,
+    #        due to the global flags redefined in paddle/paddle_inference so.
+    #        we got the error (ERROR: flag 'xxx' was defined more than once).
+    if "linux" in sys_platform:
+        if should_import_paddle():
+            import paddle  # need import paddle first for paddle2.4.x
+
+            # check whether tensorrt in LD_LIBRARY_PATH for ultrainfer
+            if should_set_tensorrt() and (not tensorrt_is_avaliable()):
+                if os.path.exists(trt_directory):
+                    logging.info(
+                        "\n[WARNING] Can not find TensorRT lib in LD_LIBRARY_PATH for UltraInfer! \
+            \n[WARNING] Please export [ YOUR CUSTOM TensorRT ] lib path to LD_LIBRARY_PATH first, or run the command: \
+            \n[WARNING] Linux: 'export LD_LIBRARY_PATH=$(python -c 'from ultrainfer import trt_directory; print(trt_directory)'):$LD_LIBRARY_PATH'"
+                    )
+                else:
+                    logging.info(
+                        "\n[WARNING] Can not find TensorRT lib in LD_LIBRARY_PATH for UltraInfer! \
+            \n[WARNING] Please export [YOUR CUSTOM TensorRT] lib path to LD_LIBRARY_PATH first."
+                    )
+except:
+    pass
+
+
+os.environ["FLAGS_enable_pir_api"] = "0"
+logging.warning(
+    "Please note that we have set the environment variable \
+'FLAGS_enable_pir_api' to 'False' to ensure the correct operation of the Paddle backend."
+)
+
+
+from .c_lib_wrap import (
+    ModelFormat,
+    Backend,
+    FDDataType,
+    TensorInfo,
+    Device,
+    is_built_with_gpu,
+    is_built_with_ort,
+    ModelFormat,
+    is_built_with_paddle,
+    is_built_with_trt,
+    get_default_cuda_directory,
+)
+
+
+def set_logger(enable_info=True, enable_warning=True):
+    """Set behaviour of logger while using UltraInfer
+
+    :param enable_info: (boolean)Whether to print out log level of INFO
+    :param enable_warning: (boolean)Whether to print out log level of WARNING, recommend to set to True
+    """
+    from .c_lib_wrap import set_logger
+
+    set_logger(enable_info, enable_warning)
+
+
+from .runtime import Runtime, RuntimeOption
+from .model import UltraInferModel
+from . import c_lib_wrap as C
+from . import vision
+from . import pipeline
+from . import text
+from . import ts
+from .download import download, download_and_decompress, download_model, get_model_list
+
+
+__version__ = version
diff --git a/libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in b/libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in
new file mode 100755
index 0000000000..248f2b66af
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/c_lib_wrap.py.in
@@ -0,0 +1,190 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+import logging
+import os
+import sys
+
+user_specified_dirs = ['@OPENCV_DIRECTORY@', '@ORT_DIRECTORY@', ]
+
+
+def is_built_with_gpu() -> bool:
+    return True if "@WITH_GPU@" == "ON" else False
+
+
+def is_built_with_ort() -> bool:
+    return True if "@ENABLE_ORT_BACKEND@" == "ON" else False
+
+
+def is_built_with_trt() -> bool:
+    return True if "@ENABLE_TRT_BACKEND@" == "ON" else False
+
+
+def is_built_with_paddle() -> bool:
+    return True if "@ENABLE_PADDLE_BACKEND@" == "ON" else False
+
+
+def is_built_with_poros() ->bool:
+    return True if "@ENABLE_POROS_BACKEND@" == "ON" else False
+
+
+def is_built_with_openvino() ->bool:
+    return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False
+
+
+def get_default_cuda_directory() -> str:
+    if not is_built_with_gpu():
+       return ""
+    return r"@CUDA_DIRECTORY@".strip()
+
+
+def get_default_cuda_major_version() -> str:
+    if not is_built_with_gpu():
+       return ""
+    # TODO(qiuyanjun): get cuda version from cmake.
+    return "11"
+
+
+def find_cudart(search_dir: str) -> bool:
+    if search_dir is None:
+        logging.info("[UltraInfer][ERROR]: search_dir can not be NoneTpye.")
+        return False
+    # TODO(qiuyanjun): add Linux cudart *.so check
+    cudart_lib_name = f"cudart64_{get_default_cuda_major_version()}0.dll"
+    cudart_lib_path = os.path.join(search_dir, cudart_lib_name)
+    return os.path.exists(cudart_lib_path)
+
+
+def find_cudart_from_sys() -> bool:
+    # TODO(qiuyanjun): add Linux system paths
+    sys_paths = os.environ["path"].strip().split(";")
+    for sys_path in sys_paths:
+        if find_cudart(sys_path):
+            logging.info(f"[UltraInfer][INFO]:  Successfully found CUDA ToolKit from system PATH env -> {sys_path}")
+            return True
+    return False
+
+
+def add_system_search_paths():
+    # TODO(qiuyanjun): add Linux system paths
+    sys_paths = os.environ["path"].strip().split(";")
+    for sys_path in sys_paths:
+        if os.path.exists(sys_path) and sys.version_info[:2] >= (3, 8):
+            try:
+                os.add_dll_directory(sys_path)
+            except:
+                continue
+
+
+def add_dll_search_dir(dir_path):
+    os.environ["path"] = dir_path + ";" + os.environ["path"]
+    sys.path.insert(0, dir_path)
+    if sys.version_info[:2] >= (3, 8):
+        os.add_dll_directory(dir_path)
+
+
+def add_custom_cuda_path():
+    if is_built_with_gpu():
+        # if UltraInfer built with gpu and want to run
+        # in windows, we need to add CUDA_DIRECTORY into
+        # dll search paths to make sure UltraInfer.dll
+        # can link cudart correctly. we search the
+        # default path firstly and try to add into
+        # paths. User should set it manually if the
+        # cuda toolkit is not locate in the default
+        # path we assume.
+        base_url = "https://github.com/PaddlePaddle/FastDeploy/blob/"
+        default_cuda_dir = get_default_cuda_directory()
+        default_cuda_version = get_default_cuda_major_version()  # 11
+        cuda_shared_lib_dir = os.path.join(default_cuda_dir, "bin")
+        custom_cuda_envs = ["CUDA_DIRECTORY", "CUDA_HOME", "CUDA_ROOT", "CUDA_PATH"]
+        custom_cuda_dir = "NOTFOUNDED"
+        if not os.path.exists(cuda_shared_lib_dir):
+            # try to get cuda directory from user's local env
+            for custom_env in custom_cuda_envs:
+                custom_cuda_dir = os.getenv(custom_env, "NOTFOUNDED")
+                custom_cuda_dir = custom_cuda_dir.strip().split(";")[0]
+                if os.path.exists(custom_cuda_dir) and custom_cuda_dir != "NOTFOUNDED":
+                    break
+            if not os.path.exists(custom_cuda_dir) or custom_cuda_dir == "NOTFOUNDED":
+                logging.warnings.warn(f"\n--- UltraInfer was built with gpu, \
+                    \n--- but the default cuda directory does not exists. \
+                    \n--- Please setup one of {custom_cuda_envs} manually, \
+                    \n--- this path should look like: {default_cuda_dir}. \
+                    \n--- Check FAQ: {base_url + 'develop/docs/FAQ.md'}")
+                return
+            # path to cuda dlls
+            cuda_shared_lib_dir = os.path.join(custom_cuda_dir, "bin")
+        add_dll_search_dir(cuda_shared_lib_dir)
+        # try pre find cudart with major version, e.g 11.x/10.x
+        if not find_cudart(cuda_shared_lib_dir):
+            custom_cuda_version = os.path.basename(custom_cuda_dir)
+            logging.warnings.warn(
+                f"\n--- UltraInfer was built with CUDA major version {default_cuda_version}, \
+                \n--- but found custom CUDA version {custom_cuda_version} at {custom_cuda_dir} \
+                \n--- Please setup one of {custom_cuda_envs} manually, \
+                \n--- this path should look like: {default_cuda_dir}. \
+                \n--- Check FAQ: {base_url + 'develop/docs/FAQ.md'}")
+            return
+        logging.info(f"[UltraInfer][INFO]:  Successfully found CUDA ToolKit from -> {cuda_shared_lib_dir}")
+
+
+if os.name == "nt":
+    # cuda/cudnn libs
+    if is_built_with_gpu():
+        add_system_search_paths()
+        if not find_cudart_from_sys():
+            add_custom_cuda_path()
+
+    current_path = os.path.abspath(__file__)
+    dirname = os.path.dirname(current_path)
+    third_libs_dir = os.path.join(dirname, "libs")
+    all_dirs = user_specified_dirs + [third_libs_dir]
+    for dir in all_dirs:
+        if os.path.exists(dir):
+            add_dll_search_dir(dir)
+            for root, dirs, filenames in os.walk(dir):
+                for d in dirs:
+                    if d == "lib" or d == "bin":
+                        add_dll_search_dir(os.path.join(dirname, root, d))
+
+
+try:
+    from .libs.@PY_LIBRARY_NAME@ import *
+except Exception as e:
+    raise RuntimeError(f"UltraInfer initalized failed! Error: {e}")
+
+
+def TensorInfoStr(tensor_info):
+    message = "TensorInfo(name : '{}', dtype : '{}', shape : '{}')".format(
+        tensor_info.name, tensor_info.dtype, tensor_info.shape)
+    return message
+
+
+def RuntimeOptionStr(runtime_option):
+    attrs = dir(runtime_option)
+    message = "RuntimeOption(\n"
+    for attr in attrs:
+        if attr.startswith("__"):
+            continue
+        if hasattr(getattr(runtime_option, attr), "__call__"):
+            continue
+        message += "  {} : {}\t\n".format(attr, getattr(runtime_option, attr))
+    message.strip("\n")
+    message += ")"
+    return message
+
+
+TensorInfo.__repr__ = TensorInfoStr
+RuntimeOption.__repr__ = RuntimeOptionStr
diff --git a/libs/ultrainfer/python/ultrainfer/download.py b/libs/ultrainfer/python/ultrainfer/download.py
new file mode 100755
index 0000000000..d458b8d40a
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/download.py
@@ -0,0 +1,274 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import os.path as osp
+import shutil
+import requests
+import time
+import zipfile
+import tarfile
+import hashlib
+import tqdm
+import logging
+
+from .utils.hub_model_server import model_server
+from .utils import hub_env as hubenv
+
+DOWNLOAD_RETRY_LIMIT = 3
+
+
+def md5check(fullname, md5sum=None):
+    if md5sum is None:
+        return True
+
+    logging.info("File {} md5 checking...".format(fullname))
+    md5 = hashlib.md5()
+    with open(fullname, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            md5.update(chunk)
+    calc_md5sum = md5.hexdigest()
+
+    if calc_md5sum != md5sum:
+        logging.info(
+            "File {} md5 check failed, {}(calc) != "
+            "{}(base)".format(fullname, calc_md5sum, md5sum)
+        )
+        return False
+    return True
+
+
+def move_and_merge_tree(src, dst):
+    """
+    Move src directory to dst, if dst is already exists,
+    merge src to dst
+    """
+    if not osp.exists(dst):
+        shutil.move(src, dst)
+    else:
+        if not osp.isdir(src):
+            shutil.move(src, dst)
+            return
+        for fp in os.listdir(src):
+            src_fp = osp.join(src, fp)
+            dst_fp = osp.join(dst, fp)
+            if osp.isdir(src_fp):
+                if osp.isdir(dst_fp):
+                    move_and_merge_tree(src_fp, dst_fp)
+                else:
+                    shutil.move(src_fp, dst_fp)
+            elif osp.isfile(src_fp) and not osp.isfile(dst_fp):
+                shutil.move(src_fp, dst_fp)
+
+
+def download(url, path, rename=None, md5sum=None, show_progress=False):
+    """
+    Download from url, save to path.
+    url (str): download url
+    path (str): download to given path
+    """
+    if not osp.exists(path):
+        os.makedirs(path)
+
+    fname = osp.split(url)[-1]
+    fullname = osp.join(path, fname)
+    if rename is not None:
+        fullname = osp.join(path, rename)
+    retry_cnt = 0
+    while not (osp.exists(fullname) and md5check(fullname, md5sum)):
+        if retry_cnt < DOWNLOAD_RETRY_LIMIT:
+            retry_cnt += 1
+        else:
+            logging.debug("{} download failed.".format(fname))
+            raise RuntimeError(
+                "Download from {} failed. " "Retry limit reached".format(url)
+            )
+
+        logging.info("Downloading {} from {}".format(fname, url))
+
+        req = requests.get(url, stream=True)
+        if req.status_code != 200:
+            raise RuntimeError(
+                "Downloading from {} failed with code "
+                "{}!".format(url, req.status_code)
+            )
+
+        # For protecting download interupted, download to
+        # tmp_fullname firstly, move tmp_fullname to fullname
+        # after download finished
+        tmp_fullname = fullname + "_tmp"
+        total_size = req.headers.get("content-length")
+        with open(tmp_fullname, "wb") as f:
+            if total_size and show_progress:
+                for chunk in tqdm.tqdm(
+                    req.iter_content(chunk_size=1024),
+                    total=(int(total_size) + 1023) // 1024,
+                    unit="KB",
+                ):
+                    f.write(chunk)
+            else:
+                for chunk in req.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+        shutil.move(tmp_fullname, fullname)
+        logging.debug("{} download completed.".format(fname))
+
+    return fullname
+
+
+def decompress(fname):
+    """
+    Decompress for zip and tar file
+    """
+    logging.info("Decompressing {}...".format(fname))
+
+    # For protecting decompressing interupted,
+    # decompress to fpath_tmp directory firstly, if decompress
+    # successed, move decompress files to fpath and delete
+    # fpath_tmp and remove download compress file.
+    fpath = osp.split(fname)[0]
+    fpath_tmp = osp.join(fpath, "tmp")
+    if osp.isdir(fpath_tmp):
+        shutil.rmtree(fpath_tmp)
+        os.makedirs(fpath_tmp)
+
+    if fname.find(".tar") >= 0 or fname.find(".tgz") >= 0:
+        with tarfile.open(fname) as tf:
+
+            def is_within_directory(directory, target):
+
+                abs_directory = os.path.abspath(directory)
+                abs_target = os.path.abspath(target)
+
+                prefix = os.path.commonprefix([abs_directory, abs_target])
+
+                return prefix == abs_directory
+
+            def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
+
+                for member in tar.getmembers():
+                    member_path = os.path.join(path, member.name)
+                    if not is_within_directory(path, member_path):
+                        raise Exception("Attempted Path Traversal in Tar File")
+
+                tar.extractall(path, members, numeric_owner=numeric_owner)
+
+            safe_extract(tf, path=fpath_tmp)
+    elif fname.find(".zip") >= 0:
+        with zipfile.ZipFile(fname) as zf:
+            zf.extractall(path=fpath_tmp)
+    else:
+        raise TypeError("Unsupport compress file type {}".format(fname))
+
+    for f in os.listdir(fpath_tmp):
+        src_dir = osp.join(fpath_tmp, f)
+        dst_dir = osp.join(fpath, f)
+        move_and_merge_tree(src_dir, dst_dir)
+
+    shutil.rmtree(fpath_tmp)
+    logging.debug("{} decompressed.".format(fname))
+    return dst_dir
+
+
+def url2dir(url, path, rename=None):
+    full_name = download(url, path, rename, show_progress=True)
+    print("File is donwloaded, now extracting...")
+    if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0:
+        return decompress(full_name)
+
+
+def download_and_decompress(url, path=".", rename=None):
+    fname = osp.split(url)[-1]
+    fullname = osp.join(path, fname)
+    # if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')):
+    #     fullname = osp.join(path, fname.split('.')[0])
+    nranks = 0
+    if nranks <= 1:
+        dst_dir = url2dir(url, path, rename)
+        if dst_dir is not None:
+            fullname = dst_dir
+    else:
+        lock_path = fullname + ".lock"
+        if not os.path.exists(fullname):
+            with open(lock_path, "w"):
+                os.utime(lock_path, None)
+            if nranks == 0:
+                dst_dir = url2dir(url, path, rename)
+                if dst_dir is not None:
+                    fullname = dst_dir
+                os.remove(lock_path)
+            else:
+                while os.path.exists(lock_path):
+                    time.sleep(1)
+    return
+
+
+def get_model_list(category: str = None):
+    """
+    Get all pre-trained models information supported by fd.download_model.
+    Args:
+        category(str): model category, if None, list all models in all categories.
+    Returns:
+        results(dict): a dictionary, key is category, value is a list which contains models information.
+    """
+    result = model_server.get_model_list()
+    if result["status"] != 0:
+        raise ValueError(
+            "Failed to get pretrained models information from hub model server."
+        )
+    result = result["data"]
+    if category is None:
+        return result
+    elif category in result:
+        return {category: result[category]}
+    else:
+        raise ValueError(
+            "No pretrained model in category {} can be downloaded now.".format(category)
+        )
+
+
+def download_model(
+    name: str, path: str = None, format: str = None, version: str = None
+):
+    """
+    Download pre-trained model for UltraInfer inference engine.
+    Args:
+        name: model name
+        path(str): local path for saving model. If not set, default is hubenv.MODEL_HOME
+        format(str): UltraInfer model format
+        version(str) : UltraInfer model version
+    """
+    result = model_server.search_model(name, format, version)
+    if path is None:
+        path = hubenv.MODEL_HOME
+    if result:
+        url = result[0]["url"]
+        format = result[0]["format"]
+        version = result[0]["version"]
+        fullpath = download(url, path, show_progress=True)
+        model_server.stat_model(name, format, version)
+        if format == "paddle":
+            if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0:
+                archive_path = fullpath
+                fullpath = decompress(fullpath)
+                try:
+                    os.rename(fullpath, os.path.join(os.path.dirname(fullpath), name))
+                    fullpath = os.path.join(os.path.dirname(fullpath), name)
+                    os.remove(archive_path)
+                except FileExistsError:
+                    pass
+        print("Successfully download model at path: {}".format(fullpath))
+        return fullpath
+    else:
+        print("ERROR: Could not find a model named {}".format(name))
diff --git a/libs/ultrainfer/python/ultrainfer/model.py b/libs/ultrainfer/python/ultrainfer/model.py
new file mode 100755
index 0000000000..3166abd6e9
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/model.py
@@ -0,0 +1,88 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+import abc
+import logging
+from . import c_lib_wrap as C
+
+
+class BaseUltraInferModel(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def model_name(self):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def num_inputs_of_runtime(self):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def num_outputs_of_runtime(self):
+        raise NotImplementedError
+
+
+class UltraInferModel(BaseUltraInferModel):
+    def __init__(self, option):
+        self._model = None
+        if option is None:
+            self._runtime_option = C.RuntimeOption()
+        else:
+            self._runtime_option = option._option
+
+    def model_name(self):
+        return self._model.model_name()
+
+    def num_inputs_of_runtime(self):
+        return self._model.num_inputs_of_runtime()
+
+    def num_outputs_of_runtime(self):
+        return self._model.num_outputs_of_runtime()
+
+    def input_info_of_runtime(self, index):
+        assert (
+            index < self.num_inputs_of_runtime()
+        ), "The index:{} must be less than number of inputs:{}.".format(
+            index, self.num_inputs_of_runtime()
+        )
+        return self._model.input_info_of_runtime(index)
+
+    def output_info_of_runtime(self, index):
+        assert (
+            index < self.num_outputs_of_runtime()
+        ), "The index:{} must be less than number of outputs:{}.".format(
+            index, self.num_outputs_of_runtime()
+        )
+        return self._model.output_info_of_runtime(index)
+
+    def enable_record_time_of_runtime(self):
+        self._model.enable_record_time_of_runtime()
+
+    def disable_record_time_of_runtime(self):
+        self._model.disable_record_time_of_runtime()
+
+    def print_statis_info_of_runtime(self):
+        return self._model.print_statis_info_of_runtime()
+
+    def get_profile_time(self):
+        """Get profile time of Runtime after the profile process is done."""
+        return self._model.get_profile_time()
+
+    @property
+    def runtime_option(self):
+        return self._model.runtime_option if self._model is not None else None
+
+    @property
+    def initialized(self):
+        if self._model is None:
+            return False
+        return self._model.initialized()
diff --git a/libs/ultrainfer/python/ultrainfer/pipeline/__init__.py b/libs/ultrainfer/python/ultrainfer/pipeline/__init__.py
new file mode 100755
index 0000000000..d5ff43ef02
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/pipeline/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .pptinypose import PPTinyPose
diff --git a/libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py b/libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py
new file mode 100755
index 0000000000..d0f0a27ab3
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/pipeline/pptinypose/__init__.py
@@ -0,0 +1,58 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from ... import c_lib_wrap as C
+
+
+class PPTinyPose(object):
+    def __init__(self, det_model=None, pptinypose_model=None):
+        """Set initialized detection model object and pptinypose model object
+
+        :param det_model: (ultrainfer.vision.detection.PicoDet)Initialized detection model object
+        :param pptinypose_model: (ultrainfer.vision.keypointdetection.PPTinyPose)Initialized pptinypose model object
+        """
+        assert (
+            det_model is not None or pptinypose_model is not None
+        ), "The det_model and pptinypose_model cannot be None."
+        self._pipeline = C.pipeline.PPTinyPose(
+            det_model._model, pptinypose_model._model
+        )
+
+    def predict(self, input_image):
+        """Predict the keypoint detection result for an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: KeyPointDetectionResult
+        """
+        return self._pipeline.predict(input_image)
+
+    @property
+    def detection_model_score_threshold(self):
+        """Atrribute of PPTinyPose pipeline model. Stating the score threshold for detectin model to filter bbox before inputting pptinypose model
+
+        :return: value of detection_model_score_threshold(float)
+        """
+        return self._pipeline.detection_model_score_threshold
+
+    @detection_model_score_threshold.setter
+    def detection_model_score_threshold(self, value):
+        """Set attribute detection_model_score_threshold of PPTinyPose pipeline model.
+
+        :param value: (float)The value to set use_dark
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `detection_model_score_threshold` must be type of float."
+        self._pipeline.detection_model_score_threshold = value
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/__init__.py b/libs/ultrainfer/python/ultrainfer/py_only/__init__.py
new file mode 100755
index 0000000000..4437de5040
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import ts, vision
+from .base import PyOnlyUltraInferModel, PyOnlyProcessor, PyOnlyProcessorChain
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/base.py b/libs/ultrainfer/python/ultrainfer/py_only/base.py
new file mode 100755
index 0000000000..4edd522357
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/base.py
@@ -0,0 +1,59 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+
+from ..model import BaseUltraInferModel
+from ..runtime import Runtime, RuntimeOption
+
+_logger = logging.getLogger(__name__)
+
+
+class PyOnlyUltraInferModel(BaseUltraInferModel):
+    def __init__(self, option):
+        super().__init__()
+        if option is None:
+            self._option = RuntimeOption()
+        else:
+            self._option = option
+        self._update_option()
+        self._runtime = Runtime(self._option)
+        _logger.debug("Python-only model initialized")
+
+    def num_inputs_of_runtime(self):
+        return self._runtime.num_inputs()
+
+    def num_outputs_of_runtime(self):
+        return self._runtime.num_outputs()
+
+    def _update_option(self):
+        pass
+
+
+class PyOnlyProcessor(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def __call__(self, data):
+        raise NotImplementedError
+
+
+class PyOnlyProcessorChain(object):
+    def __init__(self, processors):
+        super().__init__()
+        self._processors = processors
+
+    def __call__(self, data):
+        for processor in self._processors:
+            data = processor(data)
+        return data
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py b/libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py
new file mode 100755
index 0000000000..6de43119b9
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/ts/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import processors
+from .model import PyOnlyTSModel
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/ts/model.py b/libs/ultrainfer/python/ultrainfer/py_only/ts/model.py
new file mode 100755
index 0000000000..8a996fb297
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/ts/model.py
@@ -0,0 +1,25 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+from ..base import PyOnlyUltraInferModel
+
+
+class PyOnlyTSModel(PyOnlyUltraInferModel):
+    @abc.abstractmethod
+    def batch_predict(self, ts_list):
+        raise NotImplementedError
+
+    def predict(self, ts):
+        return self.batch_predict([ts])[0]
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py b/libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py
new file mode 100755
index 0000000000..cdc72c94d9
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/ts/processors.py
@@ -0,0 +1,582 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, Union, Dict
+
+import chinese_calendar
+import joblib
+import numpy as np
+import pandas as pd
+from pandas.tseries.offsets import DateOffset, Easter, Day
+from pandas.tseries import holiday as hd
+from sklearn.preprocessing import StandardScaler
+
+from ..base import PyOnlyProcessor
+
+__all__ = [
+    "CutOff",
+    "Normalize",
+    "Denormalize",
+    "BuildTSDataset",
+    "CalcTimeFeatures",
+    "BuildPaddedMask",
+    "DataFrame2Arrays",
+]
+
+_MAX_WINDOW = 183 + 17
+_EASTER_SUNDAY = hd.Holiday("Easter Sunday", month=1, day=1, offset=[Easter(), Day(0)])
+_NEW_YEARS_DAY = hd.Holiday("New Years Day", month=1, day=1)
+_SUPER_BOWL = hd.Holiday(
+    "Superbowl", month=2, day=1, offset=DateOffset(weekday=hd.SU(1))
+)
+_MOTHERS_DAY = hd.Holiday(
+    "Mothers Day", month=5, day=1, offset=DateOffset(weekday=hd.SU(2))
+)
+_INDEPENDENCE_DAY = hd.Holiday("Independence Day", month=7, day=4)
+_CHRISTMAS_EVE = hd.Holiday("Christmas", month=12, day=24)
+_CHRISTMAS_DAY = hd.Holiday("Christmas", month=12, day=25)
+_NEW_YEARS_EVE = hd.Holiday("New Years Eve", month=12, day=31)
+_BLACK_FRIDAY = hd.Holiday(
+    "Black Friday",
+    month=11,
+    day=1,
+    offset=[pd.DateOffset(weekday=hd.TH(4)), Day(1)],
+)
+_CYBER_MONDAY = hd.Holiday(
+    "Cyber Monday",
+    month=11,
+    day=1,
+    offset=[pd.DateOffset(weekday=hd.TH(4)), Day(4)],
+)
+
+_HOLYDAYS = [
+    hd.EasterMonday,
+    hd.GoodFriday,
+    hd.USColumbusDay,
+    hd.USLaborDay,
+    hd.USMartinLutherKingJr,
+    hd.USMemorialDay,
+    hd.USPresidentsDay,
+    hd.USThanksgivingDay,
+    _EASTER_SUNDAY,
+    _NEW_YEARS_DAY,
+    _SUPER_BOWL,
+    _MOTHERS_DAY,
+    _INDEPENDENCE_DAY,
+    _CHRISTMAS_EVE,
+    _CHRISTMAS_DAY,
+    _NEW_YEARS_EVE,
+    _BLACK_FRIDAY,
+    _CYBER_MONDAY,
+]
+
+
+def _cal_year(
+    x: np.datetime64,
+):
+    return x.year
+
+
+def _cal_month(
+    x: np.datetime64,
+):
+    return x.month
+
+
+def _cal_day(
+    x: np.datetime64,
+):
+    return x.day
+
+
+def _cal_hour(
+    x: np.datetime64,
+):
+    return x.hour
+
+
+def _cal_weekday(
+    x: np.datetime64,
+):
+    return x.dayofweek
+
+
+def _cal_quarter(
+    x: np.datetime64,
+):
+    return x.quarter
+
+
+def _cal_hourofday(
+    x: np.datetime64,
+):
+    return x.hour / 23.0 - 0.5
+
+
+def _cal_dayofweek(
+    x: np.datetime64,
+):
+    return x.dayofweek / 6.0 - 0.5
+
+
+def _cal_dayofmonth(
+    x: np.datetime64,
+):
+    return x.day / 30.0 - 0.5
+
+
+def _cal_dayofyear(
+    x: np.datetime64,
+):
+    return x.dayofyear / 364.0 - 0.5
+
+
+def _cal_weekofyear(
+    x: np.datetime64,
+):
+    return x.weekofyear / 51.0 - 0.5
+
+
+def _cal_holiday(
+    x: np.datetime64,
+):
+    return float(chinese_calendar.is_holiday(x))
+
+
+def _cal_workday(
+    x: np.datetime64,
+):
+    return float(chinese_calendar.is_workday(x))
+
+
+def _cal_minuteofhour(
+    x: np.datetime64,
+):
+    return x.minute / 59 - 0.5
+
+
+def _cal_monthofyear(
+    x: np.datetime64,
+):
+    return x.month / 11.0 - 0.5
+
+
+_CAL_DATE_METHOD = {
+    "year": _cal_year,
+    "month": _cal_month,
+    "day": _cal_day,
+    "hour": _cal_hour,
+    "weekday": _cal_weekday,
+    "quarter": _cal_quarter,
+    "minuteofhour": _cal_minuteofhour,
+    "monthofyear": _cal_monthofyear,
+    "hourofday": _cal_hourofday,
+    "dayofweek": _cal_dayofweek,
+    "dayofmonth": _cal_dayofmonth,
+    "dayofyear": _cal_dayofyear,
+    "weekofyear": _cal_weekofyear,
+    "is_holiday": _cal_holiday,
+    "is_workday": _cal_workday,
+}
+
+
+def _load_from_one_dataframe(
+    data: Union[pd.DataFrame, pd.Series],
+    time_col: Optional[str] = None,
+    value_cols: Optional[Union[List[str], str]] = None,
+    freq: Optional[Union[str, int]] = None,
+    drop_tail_nan: bool = False,
+    dtype: Optional[Union[type, Dict[str, type]]] = None,
+):
+    series_data = None
+    if value_cols is None:
+        if isinstance(data, pd.Series):
+            series_data = data.copy()
+        else:
+            series_data = data.loc[:, data.columns != time_col].copy()
+    else:
+        series_data = data.loc[:, value_cols].copy()
+
+    if time_col:
+        if time_col not in data.columns:
+            raise ValueError(
+                "The time column: {} doesn't exist in the `data`!".format(time_col)
+            )
+        time_col_vals = data.loc[:, time_col]
+    else:
+        time_col_vals = data.index
+
+    if np.issubdtype(time_col_vals.dtype, np.integer) and isinstance(freq, str):
+        time_col_vals = time_col_vals.astype(str)
+
+    if np.issubdtype(time_col_vals.dtype, np.integer):
+        if freq:
+            if not isinstance(freq, int) or freq < 1:
+                raise ValueError(
+                    "The type of `freq` should be `int` when the type of `time_col` is `RangeIndex`."
+                )
+        else:
+            freq = 1
+        start_idx, stop_idx = min(time_col_vals), max(time_col_vals) + freq
+        if (stop_idx - start_idx) / freq != len(data):
+            raise ValueError("The number of rows doesn't match with the RangeIndex!")
+        time_index = pd.RangeIndex(start=start_idx, stop=stop_idx, step=freq)
+    elif np.issubdtype(time_col_vals.dtype, np.object_) or np.issubdtype(
+        time_col_vals.dtype, np.datetime64
+    ):
+        time_col_vals = pd.to_datetime(time_col_vals, infer_datetime_format=True)
+        time_index = pd.DatetimeIndex(time_col_vals)
+        if freq:
+            if not isinstance(freq, str):
+                raise ValueError(
+                    "The type of `freq` should be `str` when the type of `time_col` is `DatetimeIndex`."
+                )
+        else:
+            # If freq is not provided and automatic inference fail, throw exception
+            freq = pd.infer_freq(time_index)
+            if freq is None:
+                raise ValueError(
+                    "Failed to infer the `freq`. A valid `freq` is required."
+                )
+            if freq[0] == "-":
+                freq = freq[1:]
+    else:
+        raise ValueError("The type of `time_col` is invalid.")
+    if isinstance(series_data, pd.Series):
+        series_data = series_data.to_frame()
+    series_data.set_index(time_index, inplace=True)
+    series_data.sort_index(inplace=True)
+    return series_data
+
+
+def _load_from_dataframe(
+    df: pd.DataFrame,
+    group_id: str = None,
+    time_col: Optional[str] = None,
+    target_cols: Optional[Union[List[str], str]] = None,
+    label_col: Optional[Union[List[str], str]] = None,
+    observed_cov_cols: Optional[Union[List[str], str]] = None,
+    feature_cols: Optional[Union[List[str], str]] = None,
+    known_cov_cols: Optional[Union[List[str], str]] = None,
+    static_cov_cols: Optional[Union[List[str], str]] = None,
+    freq: Optional[Union[str, int]] = None,
+    fill_missing_dates: bool = False,
+    fillna_method: str = "pre",
+    fillna_window_size: int = 10,
+    **kwargs,
+):
+    dfs = []  # seperate multiple group
+    if group_id is not None:
+        group_unique = df[group_id].unique()
+        for column in group_unique:
+            dfs.append(df[df[group_id].isin([column])])
+    else:
+        dfs = [df]
+    res = []
+    if label_col:
+        if isinstance(label_col, str) and len(label_col) > 1:
+            raise ValueError("The length of label_col must be 1.")
+        target_cols = label_col
+    if feature_cols:
+        observed_cov_cols = feature_cols
+    for df in dfs:
+        target = None
+        observed_cov = None
+        known_cov = None
+        static_cov = dict()
+        if not any([target_cols, observed_cov_cols, known_cov_cols, static_cov_cols]):
+            target = _load_from_one_dataframe(
+                df,
+                time_col,
+                [a for a in df.columns if a != time_col],
+                freq,
+            )
+
+        else:
+            if target_cols:
+                target = _load_from_one_dataframe(
+                    df,
+                    time_col,
+                    target_cols,
+                    freq,
+                )
+
+            if observed_cov_cols:
+                observed_cov = _load_from_one_dataframe(
+                    df,
+                    time_col,
+                    observed_cov_cols,
+                    freq,
+                )
+
+            if known_cov_cols:
+                known_cov = _load_from_one_dataframe(
+                    df,
+                    time_col,
+                    known_cov_cols,
+                    freq,
+                )
+
+            if static_cov_cols:
+                if isinstance(static_cov_cols, str):
+                    static_cov_cols = [static_cov_cols]
+                for col in static_cov_cols:
+                    if col not in df.columns or len(np.unique(df[col])) != 1:
+                        raise ValueError(
+                            "static cov cals data is not in columns or schema is not right!"
+                        )
+                    static_cov[col] = df[col].iloc[0]
+        res.append(
+            {
+                "past_target": target,
+                "observed_cov_numeric": observed_cov,
+                "known_cov_numeric": known_cov,
+                "static_cov_numeric": static_cov,
+            }
+        )
+    return res[0]
+
+
+def _distance_to_holiday(holiday):
+    def _distance_to_day(index):
+        holiday_date = holiday.dates(
+            index - pd.Timedelta(days=_MAX_WINDOW),
+            index + pd.Timedelta(days=_MAX_WINDOW),
+        )
+        assert (
+            len(holiday_date) != 0
+        ), f"No closest holiday for the date index {index} found."
+        # It sometimes returns two dates if it is exactly half a year after the
+        # holiday. In this case, the smaller distance (182 days) is returned.
+        return float((index - holiday_date[0]).days)
+
+    return _distance_to_day
+
+
+def _to_time_features(
+    dataset, freq, feature_cols, extend_points, inplace: bool = False
+):
+    new_ts = dataset
+    if not inplace:
+        new_ts = dataset.copy()
+    # Get known_cov
+    kcov = new_ts["known_cov_numeric"]
+    if not kcov:
+        tf_kcov = new_ts["past_target"].index.to_frame()
+    else:
+        tf_kcov = kcov.index.to_frame()
+    time_col = tf_kcov.columns[0]
+    if np.issubdtype(tf_kcov[time_col].dtype, np.integer):
+        raise ValueError(
+            "The time_col can't be the type of numpy.integer, and it must be the type of numpy.datetime64"
+        )
+    if not kcov:
+        freq = freq if freq is not None else pd.infer_freq(tf_kcov[time_col])
+        extend_time = pd.date_range(
+            start=tf_kcov[time_col][-1],
+            freq=freq,
+            periods=extend_points + 1,
+            closed="right",
+            name=time_col,
+        ).to_frame()
+        tf_kcov = pd.concat([tf_kcov, extend_time])
+
+    for k in feature_cols:
+        if k != "holidays":
+            v = tf_kcov[time_col].apply(lambda x: _CAL_DATE_METHOD[k](x))
+            v.index = tf_kcov[time_col]
+
+            if new_ts["known_cov_numeric"] is None:
+                new_ts["known_cov_numeric"] = pd.DataFrame(v.rename(k), index=v.index)
+            else:
+                new_ts["known_cov_numeric"][k] = v.rename(k).reindex(
+                    new_ts["known_cov_numeric"].index
+                )
+
+        else:
+            holidays_col = []
+            for i, H in enumerate(_HOLYDAYS):
+                v = tf_kcov[time_col].apply(_distance_to_holiday(H))
+                v.index = tf_kcov[time_col]
+                holidays_col.append(k + "_" + str(i))
+                if new_ts["known_cov_numeric"] is None:
+                    new_ts["known_cov_numeric"] = pd.DataFrame(
+                        v.rename(k + "_" + str(i)), index=v.index
+                    )
+                else:
+                    new_ts["known_cov_numeric"][k + "_" + str(i)] = v.rename(k).reindex(
+                        new_ts["known_cov_numeric"].index
+                    )
+
+            scaler = StandardScaler()
+            scaler.fit(new_ts["known_cov_numeric"][holidays_col])
+            new_ts["known_cov_numeric"][holidays_col] = scaler.transform(
+                new_ts["known_cov_numeric"][holidays_col]
+            )
+    return new_ts
+
+
+class CutOff(PyOnlyProcessor):
+    def __init__(self, size):
+        super().__init__()
+        self._size = size
+
+    def __call__(self, data):
+        ts = data["ts"]
+        ori_ts = data["ori_ts"]
+
+        skip_len = self._size.get("skip_chunk_len", 0)
+        if len(ts) < self._size["in_chunk_len"] + skip_len:
+            raise ValueError(
+                f"The length of the input data is {len(ts)}, but it should be at least {self._size['in_chunk_len'] + self._size['skip_chunk_len']} for training."
+            )
+        ts_data = ts[-(self._size["in_chunk_len"] + skip_len) :]
+
+        return {**data, "ts": ts_data, "ori_ts": ts_data}
+
+
+class Normalize(PyOnlyProcessor):
+    def __init__(self, scale_path, params_info):
+        super().__init__()
+        self._scaler = joblib.load(scale_path)
+        self._params_info = params_info
+
+    def __call__(self, data):
+        ts = data["ts"]
+
+        if self._params_info.get("target_cols", None) is not None:
+            ts[self._params_info["target_cols"]] = self._scaler.transform(
+                ts[self._params_info["target_cols"]]
+            )
+        if self._params_info.get("feature_cols", None) is not None:
+            ts[self._params_info["feature_cols"]] = self._scaler.transform(
+                ts[self._params_info["feature_cols"]]
+            )
+
+        return {**data, "ts": ts}
+
+
+class Denormalize(PyOnlyProcessor):
+    def __init__(self, scale_path, params_info):
+        super().__init__()
+        self._scaler = joblib.load(scale_path)
+        self._params_info = params_info
+
+    def __call__(self, data):
+        pred = data["pred"]
+
+        scale_cols = pred.columns.values.tolist()
+        pred[scale_cols] = self._scaler.inverse_transform(pred[scale_cols])
+
+        return {**data, "pred": pred}
+
+
+class BuildTSDataset(PyOnlyProcessor):
+    def __init__(self, params_info):
+        super().__init__()
+        self._params_info = params_info
+
+    def __call__(self, data):
+        ts = data["ts"]
+        ori_ts = data["ori_ts"]
+
+        ts_data = _load_from_dataframe(ts, **self._params_info)
+
+        return {**data, "ts": ts_data, "ori_ts": ts_data}
+
+
+class CalcTimeFeatures(PyOnlyProcessor):
+    def __init__(self, params_info, size, holiday=False):
+        super().__init__()
+        self._freq = params_info["freq"]
+        self._size = size
+        self._holiday = holiday
+
+    def __call__(self, data):
+        ts = data["ts"]
+
+        if not self._holiday:
+            ts = _to_time_features(
+                ts,
+                self._freq,
+                ["hourofday", "dayofmonth", "dayofweek", "dayofyear"],
+                self._size["out_chunk_len"],
+            )
+        else:
+            ts = _to_time_features(
+                ts,
+                self._freq,
+                [
+                    "minuteofhour",
+                    "hourofday",
+                    "dayofmonth",
+                    "dayofweek",
+                    "dayofyear",
+                    "monthofyear",
+                    "weekofyear",
+                    "holidays",
+                ],
+                self._size["out_chunk_len"],
+            )
+
+        return {**data, "ts": ts}
+
+
+class BuildPaddedMask(PyOnlyProcessor):
+    def __init__(self, input_data):
+        super().__init__()
+        self._input_data = input_data
+
+    def __call__(self, data):
+        ts = data["ts"]
+
+        if "features" in self._input_data:
+            ts["features"] = ts["past_target"]
+
+        if "pad_mask" in self._input_data:
+            target_dim = len(ts["features"])
+            max_length = self._input_data["pad_mask"][-1]
+            if max_length > 0:
+                ones = np.ones(max_length, dtype=np.int32)
+                if max_length != target_dim:
+                    target_ndarray = np.array(ts["features"]).astype(np.float32)
+                    target_ndarray_final = np.zeros(
+                        [max_length, target_dim], dtype=np.int32
+                    )
+                    end = min(target_dim, max_length)
+                    target_ndarray_final[:end, :] = target_ndarray
+                    ts["features"] = target_ndarray_final
+                    ones[end:] = 0.0
+                    ts["pad_mask"] = ones
+                else:
+                    ts["pad_mask"] = ones
+
+        return {**data, "ts": ts}
+
+
+class DataFrame2Arrays(PyOnlyProcessor):
+    def __init__(self, input_data):
+        super().__init__()
+        self._input_data = input_data
+
+    def __call__(self, data):
+        ts = data["ts"]
+
+        ts_list = []
+        input_name = list(self._input_data.keys())
+        input_name.sort()
+        for key in input_name:
+            ts_list.append(np.array(ts[key]).astype("float32"))
+
+        return {**data, "ts": ts_list}
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py b/libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py
new file mode 100755
index 0000000000..784aa87714
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/vision/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import processors
+from .model import PyOnlyVisionModel
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/vision/model.py b/libs/ultrainfer/python/ultrainfer/py_only/vision/model.py
new file mode 100755
index 0000000000..0be93de2fa
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/vision/model.py
@@ -0,0 +1,26 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+
+from ..base import PyOnlyUltraInferModel
+
+
+class PyOnlyVisionModel(PyOnlyUltraInferModel):
+    @abc.abstractmethod
+    def batch_predict(self, imgs):
+        raise NotImplementedError
+
+    def predict(self, img):
+        return self.batch_predict([img])[0]
diff --git a/libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py b/libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py
new file mode 100755
index 0000000000..ba534854ca
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/py_only/vision/processors.py
@@ -0,0 +1,465 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import numpy as np
+import cv2
+from PIL import Image
+
+from ..base import PyOnlyProcessor
+
+__all__ = [
+    "GetImageInfo",
+    "Flip",
+    "Crop",
+    "Resize",
+    "ResizeByLong",
+    "ResizeByShort",
+    "Pad",
+    "PadStride",
+    "Normalize",
+    "ToCHWImage",
+    "LaTeXOCRReisizeNormImg",
+]
+
+
+def _resize(im, target_size, interp):
+    w, h = target_size
+    im = cv2.resize(im, (w, h), interpolation=interp)
+    return im
+
+
+def _flip_h(im):
+    if len(im.shape) == 3:
+        im = im[:, ::-1, :]
+    elif len(im.shape) == 2:
+        im = im[:, ::-1]
+    return im
+
+
+def _flip_v(im):
+    if len(im.shape) == 3:
+        im = im[::-1, :, :]
+    elif len(im.shape) == 2:
+        im = im[::-1, :]
+    return im
+
+
+def _slice(im, coords):
+    x1, y1, x2, y2 = coords
+    im = im[y1:y2, x1:x2, ...]
+    return im
+
+
+def _pad(im, pad, val):
+    if isinstance(pad, int):
+        pad = [pad] * 4
+    if len(pad) != 4:
+        raise ValueError
+    chns = 1 if im.ndim == 2 else im.shape[2]
+    im = cv2.copyMakeBorder(im, *pad, cv2.BORDER_CONSTANT, value=(val,) * chns)
+    return im
+
+
+def _check_image_size(input_):
+    if not (
+        isinstance(input_, (list, tuple))
+        and len(input_) == 2
+        and isinstance(input_[0], int)
+        and isinstance(input_[1], int)
+    ):
+        raise TypeError(f"{input_} cannot represent a valid image size.")
+
+
+class GetImageInfo(PyOnlyProcessor):
+    def __call__(self, data):
+        img = data["img"]
+
+        return {**data, "img_size": [img.shape[1], img.shape[0]]}
+
+
+class Flip(PyOnlyProcessor):
+    def __init__(self, mode="H"):
+        super().__init__()
+        if mode not in ("H", "V"):
+            raise ValueError("`mode` should be 'H' or 'V'.")
+        self._mode = mode
+
+    def __call__(self, data):
+        img = data["img"]
+
+        if self._mode == "H":
+            img = _flip_h(img)
+        elif self._mode == "V":
+            img = _flip_v(img)
+
+        return {**data, "img": img}
+
+
+class Crop(PyOnlyProcessor):
+    def __init__(self, crop_size, mode="C"):
+        super().__init__()
+        if isinstance(crop_size, int):
+            crop_size = [crop_size, crop_size]
+        _check_image_size(crop_size)
+
+        self._crop_size = crop_size
+
+        if mode not in ("C", "TL"):
+            raise ValueError("Unsupported interpolation method")
+        self._mode = mode
+
+    def __call__(self, data):
+        img = data["img"]
+
+        h, w = img.shape[:2]
+        cw, ch = self._crop_size
+        if self._mode == "C":
+            x1 = max(0, (w - cw) // 2)
+            y1 = max(0, (h - ch) // 2)
+        elif self._mode == "TL":
+            x1, y1 = 0, 0
+        x2 = min(w, x1 + cw)
+        y2 = min(h, y1 + ch)
+        coords = (x1, y1, x2, y2)
+        if coords == (0, 0, w, h):
+            raise ValueError(
+                f"Input image ({w}, {h}) smaller than the target size ({cw}, {ch})."
+            )
+        img = _slice(img, coords=coords)
+
+        return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]}
+
+
+class _BaseResize(PyOnlyProcessor):
+    _INTERP_DICT = {
+        "NEAREST": cv2.INTER_NEAREST,
+        "LINEAR": cv2.INTER_LINEAR,
+        "CUBIC": cv2.INTER_CUBIC,
+        "AREA": cv2.INTER_AREA,
+        "LANCZOS4": cv2.INTER_LANCZOS4,
+    }
+
+    def __init__(self, size_divisor, interp):
+        super().__init__()
+
+        if size_divisor is not None:
+            assert isinstance(
+                size_divisor, int
+            ), "`size_divisor` should be None or int."
+        self._size_divisor = size_divisor
+
+        try:
+            interp = self._INTERP_DICT[interp]
+        except KeyError:
+            raise ValueError(
+                "`interp` should be one of {}.".format(self._INTERP_DICT.keys())
+            )
+        self._interp = interp
+
+    @staticmethod
+    def _rescale_size(img_size, target_size):
+        scale = min(max(target_size) / max(img_size), min(target_size) / min(img_size))
+        rescaled_size = [round(i * scale) for i in img_size]
+        return rescaled_size, scale
+
+
+class Resize(_BaseResize):
+    def __init__(
+        self, target_size, keep_ratio=False, size_divisor=None, interp="LINEAR"
+    ):
+        super().__init__(size_divisor=size_divisor, interp=interp)
+
+        if isinstance(target_size, int):
+            target_size = [target_size, target_size]
+        _check_image_size(target_size)
+        self._target_size = target_size
+
+        self._keep_ratio = keep_ratio
+
+    def __call__(self, data):
+        img = data["img"]
+
+        target_size = self._target_size
+        original_size = img.shape[:2][::-1]
+
+        if self._keep_ratio:
+            h, w = img.shape[0:2]
+            target_size, _ = self._rescale_size((w, h), self._target_size)
+
+        if self._size_divisor:
+            target_size = [
+                math.ceil(i / self._size_divisor) * self._size_divisor
+                for i in target_size
+            ]
+
+        img_scale_w, img_scale_h = [
+            target_size[0] / original_size[0],
+            target_size[1] / original_size[1],
+        ]
+        img = _resize(img, target_size, interp=self._interp)
+
+        return {
+            **data,
+            "img": img,
+            "img_size": [img.shape[1], img.shape[0]],
+            "scale_factors": [img_scale_w, img_scale_h],
+        }
+
+
+class ResizeByLong(_BaseResize):
+    def __init__(self, target_long_edge, size_divisor=None, interp="LINEAR"):
+        super().__init__(size_divisor=size_divisor, interp=interp)
+        self._target_long_edge = target_long_edge
+
+    def __call__(self, data):
+        img = data["img"]
+
+        h, w = img.shape[:2]
+        scale = self._target_long_edge / max(h, w)
+        h_resize = round(h * scale)
+        w_resize = round(w * scale)
+        if self._size_divisor is not None:
+            h_resize = math.ceil(h_resize / self._size_divisor) * self._size_divisor
+            w_resize = math.ceil(w_resize / self._size_divisor) * self._size_divisor
+
+        img = _resize(img, (w_resize, h_resize), interp=self._interp)
+
+        return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]}
+
+
+class ResizeByShort(_BaseResize):
+    INPUT_KEYS = "img"
+    OUTPUT_KEYS = ["img", "img_size"]
+    DEAULT_INPUTS = {"img": "img"}
+    DEAULT_OUTPUTS = {"img": "img", "img_size": "img_size"}
+
+    def __init__(self, target_short_edge, size_divisor=None, interp="LINEAR"):
+        super().__init__(size_divisor=size_divisor, interp=interp)
+        self._target_short_edge = target_short_edge
+
+    def __call__(self, data):
+        img = data["img"]
+
+        h, w = img.shape[:2]
+        scale = self._target_short_edge / min(h, w)
+        h_resize = round(h * scale)
+        w_resize = round(w * scale)
+        if self._size_divisor is not None:
+            h_resize = math.ceil(h_resize / self._size_divisor) * self._size_divisor
+            w_resize = math.ceil(w_resize / self._size_divisor) * self._size_divisor
+
+        img = _resize(img, (w_resize, h_resize), interp=self._interp)
+
+        return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]}
+
+
+class Pad(PyOnlyProcessor):
+    def __init__(self, target_size, val=127.5):
+        super().__init__()
+
+        if isinstance(target_size, int):
+            target_size = [target_size, target_size]
+        _check_image_size(target_size)
+        self._target_size = target_size
+
+        self._val = val
+
+    def __call__(self, data):
+        img = data["img"]
+
+        h, w = img.shape[:2]
+        tw, th = self._target_size
+        ph = th - h
+        pw = tw - w
+
+        if ph < 0 or pw < 0:
+            raise ValueError(
+                f"Input image ({w}, {h}) smaller than the target size ({tw}, {th})."
+            )
+        else:
+            img = _pad(img, pad=(0, ph, 0, pw), val=self._val)
+
+        return {**data, "img": img, "img_size": [img.shape[1], img.shape[0]]}
+
+
+class PadStride(PyOnlyProcessor):
+    INPUT_KEYS = "img"
+    OUTPUT_KEYS = "img"
+    DEAULT_INPUTS = {"img": "img"}
+    DEAULT_OUTPUTS = {"img": "img"}
+
+    def __init__(self, stride=0):
+        super().__init__()
+        self._coarsest_stride = stride
+
+    def __call__(self, data):
+        img = data["img"]
+
+        im = img
+        coarsest_stride = self._coarsest_stride
+        if coarsest_stride <= 0:
+            return {"img": im}
+        im_c, im_h, im_w = im.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = im
+
+        return {**data, "img": padding_im}
+
+
+class Normalize(PyOnlyProcessor):
+    def __init__(self, scale=1.0 / 255, mean=0.5, std=0.5, preserve_dtype=False):
+        super().__init__()
+        self._scale = np.float32(scale)
+        if isinstance(mean, float):
+            mean = [mean]
+        self._mean = np.asarray(mean).astype("float32")
+        if isinstance(std, float):
+            std = [std]
+        self._std = np.asarray(std).astype("float32")
+        self._preserve_dtype = preserve_dtype
+
+    def __call__(self, data):
+        img = data["img"]
+
+        old_type = img.dtype
+        # XXX: If `old_type` has higher precision than float32,
+        # we will lose some precision.
+        img = img.astype("float32", copy=False)
+        img *= self._scale
+        img -= self._mean
+        img /= self._std
+        if self._preserve_dtype:
+            img = img.astype(old_type, copy=False)
+
+        return {**data, "img": img}
+
+
+class ToCHWImage(PyOnlyProcessor):
+    def __call__(self, data):
+        img = data["img"]
+
+        img = img.transpose((2, 0, 1))
+
+        return {**data, "img": img}
+
+
+class BGR2RGB(PyOnlyProcessor):
+    def __call__(self, data):
+        img = data["img"]
+
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        return {**data, "img": img}
+
+
+class LaTeXOCRReisizeNormImg(PyOnlyProcessor):
+    """for ocr image resize and normalization"""
+
+    def __init__(self, rec_image_shape=(3, 48, 320)):
+        super().__init__()
+        self.rec_image_shape = rec_image_shape
+
+    def pad_(self, img, divable=32):
+        threshold = 128
+        data = np.array(img.convert("LA"))
+        if data[..., -1].var() == 0:
+            data = (data[..., 0]).astype(np.uint8)
+        else:
+            data = (255 - data[..., -1]).astype(np.uint8)
+        data = (data - data.min()) / (data.max() - data.min()) * 255
+        if data.mean() > threshold:
+            # To invert the text to white
+            gray = 255 * (data < threshold).astype(np.uint8)
+        else:
+            gray = 255 * (data > threshold).astype(np.uint8)
+            data = 255 - data
+
+        coords = cv2.findNonZero(gray)  # Find all non-zero points (text)
+        a, b, w, h = cv2.boundingRect(coords)  # Find minimum spanning bounding box
+        rect = data[b : b + h, a : a + w]
+        im = Image.fromarray(rect).convert("L")
+        dims = []
+        for x in [w, h]:
+            div, mod = divmod(x, divable)
+            dims.append(divable * (div + (1 if mod > 0 else 0)))
+        padded = Image.new("L", dims, 255)
+        padded.paste(im, (0, 0, im.size[0], im.size[1]))
+        return padded
+
+    def minmax_size_(
+        self,
+        img,
+        max_dimensions,
+        min_dimensions,
+    ):
+        if max_dimensions is not None:
+            ratios = [a / b for a, b in zip(img.size, max_dimensions)]
+            if any([r > 1 for r in ratios]):
+                size = np.array(img.size) // max(ratios)
+                img = img.resize(tuple(size.astype(int)), Image.BILINEAR)
+        if min_dimensions is not None:
+            # hypothesis: there is a dim in img smaller than min_dimensions, and return a proper dim >= min_dimensions
+            padded_size = [
+                max(img_dim, min_dim)
+                for img_dim, min_dim in zip(img.size, min_dimensions)
+            ]
+            if padded_size != list(img.size):  # assert hypothesis
+                padded_im = Image.new("L", padded_size, 255)
+                padded_im.paste(img, img.getbbox())
+                img = padded_im
+        return img
+
+    def norm_img_latexocr(self, img):
+        # CAN only predict gray scale image
+        shape = (1, 1, 3)
+        mean = [0.7931, 0.7931, 0.7931]
+        std = [0.1738, 0.1738, 0.1738]
+        scale = np.float32(1.0 / 255.0)
+        min_dimensions = [32, 32]
+        max_dimensions = [672, 192]
+        mean = np.array(mean).reshape(shape).astype("float32")
+        std = np.array(std).reshape(shape).astype("float32")
+
+        im_h, im_w = img.shape[:2]
+        if (
+            min_dimensions[0] <= im_w <= max_dimensions[0]
+            and min_dimensions[1] <= im_h <= max_dimensions[1]
+        ):
+            pass
+        else:
+            img = Image.fromarray(np.uint8(img))
+            img = self.minmax_size_(self.pad_(img), max_dimensions, min_dimensions)
+            img = np.array(img)
+            im_h, im_w = img.shape[:2]
+            img = np.dstack([img, img, img])
+        img = (img.astype("float32") * scale - mean) / std
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        divide_h = math.ceil(im_h / 16) * 16
+        divide_w = math.ceil(im_w / 16) * 16
+        img = np.pad(
+            img, ((0, divide_h - im_h), (0, divide_w - im_w)), constant_values=(1, 1)
+        )
+        img = img[:, :, np.newaxis].transpose(2, 0, 1)
+        img = img.astype("float32")
+        return img
+
+    def __call__(self, data):
+        """apply"""
+        img = data["img"]
+        img = self.norm_img_latexocr(img)
+        return {"img": img}
diff --git a/libs/ultrainfer/python/ultrainfer/runtime.py b/libs/ultrainfer/python/ultrainfer/runtime.py
new file mode 100755
index 0000000000..e558159295
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/runtime.py
@@ -0,0 +1,706 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+import logging
+import numpy as np
+from . import ModelFormat
+from . import c_lib_wrap as C
+
+
+class Runtime:
+    """UltraInfer Runtime object."""
+
+    def __init__(self, runtime_option):
+        """Initialize a UltraInfer Runtime object.
+
+        :param runtime_option: (ultrainfer.RuntimeOption)Options for UltraInfer Runtime
+        """
+
+        self._runtime = C.Runtime()
+        self.runtime_option = runtime_option
+        assert self._runtime.init(
+            self.runtime_option._option
+        ), "Initialize Runtime Failed!"
+
+    def forward(self, *inputs):
+        """[Only for Poros backend] Inference with input data for poros
+
+        :param data: (list[str : numpy.ndarray])The input data list
+        :return list of numpy.ndarray
+        """
+        if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
+            raise Exception(
+                "The forward function is only used for Poros backend, please call infer function"
+            )
+        inputs_dict = dict()
+        for i in range(len(inputs)):
+            inputs_dict["x" + str(i)] = inputs[i]
+        return self.infer(inputs_dict)
+
+    def infer(self, data):
+        """Inference with input data.
+
+        :param data: (dict[str : numpy.ndarray])The input data dict, key value must keep same with the loaded model
+        :return list of numpy.ndarray
+        """
+        assert isinstance(data, dict) or isinstance(
+            data, list
+        ), "The input data should be type of dict or list."
+        if isinstance(data, dict):
+            for k, v in data.items():
+                if isinstance(v, np.ndarray) and not v.data.contiguous:
+                    data[k] = np.ascontiguousarray(data[k])
+
+        return self._runtime.infer(data)
+
+    def bind_input_tensor(self, name, fdtensor):
+        """Bind FDTensor by name, no copy and share input memory
+
+        :param name: (str)The name of input data.
+        :param fdtensor: (ultrainfer.FDTensor)The input FDTensor.
+        """
+        self._runtime.bind_input_tensor(name, fdtensor)
+
+    def bind_output_tensor(self, name, fdtensor):
+        """Bind FDTensor by name, no copy and share output memory
+
+        :param name: (str)The name of output data.
+        :param fdtensor: (ultrainfer.FDTensor)The output FDTensor.
+        """
+        self._runtime.bind_output_tensor(name, fdtensor)
+
+    def zero_copy_infer(self):
+        """No params inference the model.
+
+        the input and output data need to pass through the bind_input_tensor and get_output_tensor interfaces.
+        """
+        self._runtime.infer()
+
+    def get_output_tensor(self, name):
+        """Get output FDTensor by name, no copy and share backend output memory
+
+        :param name: (str)The name of output data.
+        :return ultrainfer.FDTensor
+        """
+        return self._runtime.get_output_tensor(name)
+
+    def compile(self, warm_datas):
+        """[Only for Poros backend] compile with prewarm data for poros
+
+        :param data: (list[str : numpy.ndarray])The prewarm data list
+        :return TorchScript Model
+        """
+        if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
+            raise Exception(
+                "The compile function is only used for Poros backend, please call infer function"
+            )
+        assert isinstance(warm_datas, list), "The prewarm data should be type of list."
+        for i in range(len(warm_datas)):
+            warm_data = warm_datas[i]
+            if isinstance(warm_data[0], np.ndarray):
+                warm_data = list(data for data in warm_data)
+            else:
+                warm_data = list(data.numpy() for data in warm_data)
+            warm_datas[i] = warm_data
+        return self._runtime.compile(warm_datas, self.runtime_option._option)
+
+    def num_inputs(self):
+        """Get number of inputs of the loaded model."""
+        return self._runtime.num_inputs()
+
+    def num_outputs(self):
+        """Get number of outputs of the loaded model."""
+        return self._runtime.num_outputs()
+
+    def get_input_info(self, index):
+        """Get input information of the loaded model.
+
+        :param index: (int)Index of the input
+        :return ultrainfer.TensorInfo
+        """
+        assert isinstance(
+            index, int
+        ), "The input parameter index should be type of int."
+        assert (
+            index < self.num_inputs()
+        ), "The input parameter index:{} should less than number of inputs:{}.".format(
+            index, self.num_inputs
+        )
+        return self._runtime.get_input_info(index)
+
+    def get_output_info(self, index):
+        """Get output information of the loaded model.
+
+        :param index: (int)Index of the output
+        :return ultrainfer.TensorInfo
+        """
+        assert isinstance(
+            index, int
+        ), "The input parameter index should be type of int."
+        assert (
+            index < self.num_outputs()
+        ), "The input parameter index:{} should less than number of outputs:{}.".format(
+            index, self.num_outputs
+        )
+        return self._runtime.get_output_info(index)
+
+    def get_profile_time(self):
+        """Get profile time of Runtime after the profile process is done."""
+        return self._runtime.get_profile_time()
+
+
+class RuntimeOption:
+    """Options for UltraInfer Runtime."""
+
+    __slots__ = ["_option"]
+
+    def __init__(self):
+        """Initialize a UltraInfer RuntimeOption object."""
+
+        self._option = C.RuntimeOption()
+
+    def set_model_path(
+        self, model_path, params_path="", model_format=ModelFormat.PADDLE
+    ):
+        """Set path of model file and parameters file
+
+        :param model_path: (str)Path of model file
+        :param params_path: (str)Path of parameters file
+        :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
+        """
+        return self._option.set_model_path(model_path, params_path, model_format)
+
+    def set_model_buffer(
+        self, model_buffer, params_buffer="", model_format=ModelFormat.PADDLE
+    ):
+        """Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
+        :param model_buffer: (bytes)The memory buffer of model
+        :param params_buffer: (bytes)The memory buffer of the parameters
+        :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
+        """
+        return self._option.set_model_buffer(model_buffer, params_buffer, model_format)
+
+    def use_gpu(self, device_id=0):
+        """Inference with Nvidia GPU
+
+        :param device_id: (int)The index of GPU will be used for inference, default 0
+        """
+        if not C.is_built_with_gpu():
+            logging.warning(
+                "The installed ultrainfer-python package is not built with GPU, will force to use CPU. To use GPU, following the commands to install ultrainfer-gpu-python."
+            )
+            return
+        return self._option.use_gpu(device_id)
+
+    def use_kunlunxin(
+        self,
+        device_id=0,
+        l3_workspace_size=16 * 1024 * 1024,
+        locked=False,
+        autotune=True,
+        autotune_file="",
+        precision="int16",
+        adaptive_seqlen=False,
+        enable_multi_stream=False,
+        gm_default_size=0,
+    ):
+        """Inference with KunlunXin XPU
+
+        :param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0
+        :param l3_workspace_size: (int)The size of the video memory allocated by the l3 cache, the maximum is 16M, default 16M
+        :param locked: (bool)Whether the allocated L3 cache can be locked. If false, it means that the L3 cache is not locked,
+                        and the allocated L3 cache can be shared by multiple models, and multiple models
+        :param autotune: (bool)Whether to autotune the conv operator in the model.
+                        If true, when the conv operator of a certain dimension is executed for the first time,
+                        it will automatically search for a better algorithm to improve the performance of subsequent conv operators of the same dimension.
+        :param autotune_file: (str)Specify the path of the autotune file. If autotune_file is specified,
+                        the algorithm specified in the file will be used and autotune will not be performed again.
+        :param precision: (str)Calculation accuracy of multi_encoder
+        :param adaptive_seqlen: (bool)adaptive_seqlen Is the input of multi_encoder variable length
+        :param enable_multi_stream: (bool)Whether to enable the multi stream of KunlunXin XPU.
+        :param gm_default_size The default size of context global memory of KunlunXin XPU.
+        """
+        return self._option.use_kunlunxin(
+            device_id,
+            l3_workspace_size,
+            locked,
+            autotune,
+            autotune_file,
+            precision,
+            adaptive_seqlen,
+            enable_multi_stream,
+            gm_default_size,
+        )
+
+    def use_cpu(self):
+        """Inference with CPU"""
+        return self._option.use_cpu()
+
+    def use_rknpu2(
+        self, rknpu2_name=C.CpuName.RK356X, rknpu2_core=C.CoreMask.RKNN_NPU_CORE_AUTO
+    ):
+        return self._option.use_rknpu2(rknpu2_name, rknpu2_core)
+
+    def use_sophgo(self):
+        """Inference with SOPHGO TPU"""
+        return self._option.use_sophgo()
+
+    def use_ascend(self):
+        """Inference with Huawei Ascend NPU"""
+        return self._option.use_ascend()
+
+    def disable_valid_backend_check(self):
+        """Disable checking validity of backend during inference"""
+        return self._option.disable_valid_backend_check()
+
+    def enable_valid_backend_check(self):
+        """Enable checking validity of backend during inference"""
+        return self._option.enable_valid_backend_check()
+
+    def set_cpu_thread_num(self, thread_num=-1):
+        """Set number of threads if inference with CPU
+
+        :param thread_num: (int)Number of threads, if not positive, means the number of threads is decided by the backend, default -1
+        """
+        return self._option.set_cpu_thread_num(thread_num)
+
+    def set_ort_graph_opt_level(self, level=-1):
+        """Set graph optimization level for ONNX Runtime backend
+
+        :param level: (int)Optimization level, -1 means the default setting
+        """
+        logging.warning(
+            "`RuntimeOption.set_ort_graph_opt_level` will be deprecated in v1.2.0, please use `RuntimeOption.graph_optimize_level = 99` instead."
+        )
+        self._option.ort_option.graph_optimize_level = level
+
+    def use_paddle_backend(self):
+        """Use Paddle Inference backend, support inference Paddle model on CPU/Nvidia GPU."""
+        return self._option.use_paddle_backend()
+
+    def use_paddle_infer_backend(self):
+        """Wrapper function of use_paddle_backend(), use Paddle Inference backend, support inference Paddle model on CPU/Nvidia GPU."""
+        return self.use_paddle_backend()
+
+    def use_poros_backend(self):
+        """Use Poros backend, support inference TorchScript model on CPU/Nvidia GPU."""
+        return self._option.use_poros_backend()
+
+    def use_ort_backend(self):
+        """Use ONNX Runtime backend, support inference Paddle/ONNX model on CPU/Nvidia GPU."""
+        return self._option.use_ort_backend()
+
+    def use_tvm_backend(self):
+        """Use TVM Runtime backend, support inference TVM model on CPU."""
+        return self._option.use_tvm_backend()
+
+    def use_trt_backend(self):
+        """Use TensorRT backend, support inference Paddle/ONNX model on Nvidia GPU."""
+        return self._option.use_trt_backend()
+
+    def use_openvino_backend(self):
+        """Use OpenVINO backend, support inference Paddle/ONNX model on CPU."""
+        return self._option.use_openvino_backend()
+
+    def use_lite_backend(self):
+        """Use Paddle Lite backend, support inference Paddle model on ARM CPU."""
+        return self._option.use_lite_backend()
+
+    def use_paddle_lite_backend(self):
+        """Wrapper function of use_lite_backend(), use Paddle Lite backend, support inference Paddle model on ARM CPU."""
+        return self.use_lite_backend()
+
+    def set_lite_context_properties(self, context_properties):
+        """Set nnadapter context properties for Paddle Lite backend."""
+        logging.warning(
+            "`RuntimeOption.set_lite_context_properties` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_context_properties = ...` instead."
+        )
+        self._option.paddle_lite_option.nnadapter_context_properties = (
+            context_properties
+        )
+
+    def set_lite_model_cache_dir(self, model_cache_dir):
+        """Set nnadapter model cache dir for Paddle Lite backend."""
+        logging.warning(
+            "`RuntimeOption.set_lite_model_cache_dir` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_model_cache_dir = ...` instead."
+        )
+
+        self._option.paddle_lite_option.nnadapter_model_cache_dir = model_cache_dir
+
+    def set_lite_dynamic_shape_info(self, dynamic_shape_info):
+        """Set nnadapter dynamic shape info for Paddle Lite backend."""
+        logging.warning(
+            "`RuntimeOption.set_lite_dynamic_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_dynamic_shape_info = ...` instead."
+        )
+        self._option.paddle_lite_option.nnadapter_dynamic_shape_info = (
+            dynamic_shape_info
+        )
+
+    def set_lite_subgraph_partition_path(self, subgraph_partition_path):
+        """Set nnadapter subgraph partition path for Paddle Lite backend."""
+        logging.warning(
+            "`RuntimeOption.set_lite_subgraph_partition_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_path = ...` instead."
+        )
+        self._option.paddle_lite_option.nnadapter_subgraph_partition_config_path = (
+            subgraph_partition_path
+        )
+
+    def set_lite_subgraph_partition_config_buffer(self, subgraph_partition_buffer):
+        """Set nnadapter subgraph partition buffer for Paddle Lite backend."""
+        logging.warning(
+            "`RuntimeOption.set_lite_subgraph_partition_buffer` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ...` instead."
+        )
+        self._option.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = (
+            subgraph_partition_buffer
+        )
+
+    def set_lite_mixed_precision_quantization_config_path(
+        self, mixed_precision_quantization_config_path
+    ):
+        """Set nnadapter mixed precision quantization config path for Paddle Lite backend.."""
+        logging.warning(
+            "`RuntimeOption.set_lite_mixed_precision_quantization_config_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ...` instead."
+        )
+        self._option.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = (
+            mixed_precision_quantization_config_path
+        )
+
+    def set_paddle_mkldnn(self, use_mkldnn=True):
+        """Enable/Disable MKLDNN while using Paddle Inference backend, mkldnn is enabled by default."""
+        logging.warning(
+            "`RuntimeOption.set_paddle_mkldnn` will be derepcated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_mkldnn = True` instead."
+        )
+        self._option.paddle_infer_option.enable_mkldnn = True
+
+    def set_openvino_device(self, name="CPU"):
+        """Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
+        This interface is deprecated, please use `RuntimeOption.openvino_option.set_device` instead.
+        """
+        logging.warning(
+            "`RuntimeOption.set_openvino_device` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_device` instead."
+        )
+        self._option.openvino_option.set_device(name)
+
+    def set_openvino_shape_info(self, shape_info):
+        """Set shape information of the models' inputs, used for GPU to fix the shape
+           This interface is deprecated, please use `RuntimeOption.openvino_option.set_shape_info` instead.
+
+        :param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]}
+        """
+        logging.warning(
+            "`RuntimeOption.set_openvino_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_shape_info` instead."
+        )
+        self._option.openvino_option.set_shape_info(shape_info)
+
+    def set_openvino_cpu_operators(self, operators):
+        """While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU
+           This interface is deprecated, please use `RuntimeOption.openvino_option.set_cpu_operators` instead.
+
+        :param operators: (list of string)list of operators' name, e.g ["MulticlasNms"]
+        """
+        logging.warning(
+            "`RuntimeOption.set_openvino_cpu_operators` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_cpu_operators` instead."
+        )
+        self._option.openvino_option.set_cpu_operators(operators)
+
+    def enable_paddle_log_info(self):
+        """Enable print out the debug log information while using Paddle Inference backend, the log information is disabled by default."""
+        logging.warning(
+            "RuntimeOption.enable_paddle_log_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_log_info = True` instead."
+        )
+        self._option.paddle_infer_option.enable_log_info = True
+
+    def disable_paddle_log_info(self):
+        """Disable print out the debug log information while using Paddle Inference backend, the log information is disabled by default."""
+        logging.warning(
+            "RuntimeOption.disable_paddle_log_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.enable_log_info = False` instead."
+        )
+        self._option.paddle_infer_option.enable_log_info = False
+
+    def set_paddle_mkldnn_cache_size(self, cache_size):
+        """Set size of shape cache while using Paddle Inference backend with MKLDNN enabled, default will cache all the dynamic shape."""
+        logging.warning(
+            "RuntimeOption.set_paddle_mkldnn_cache_size` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.mkldnn_cache_size = {}` instead.".format(
+                cache_size
+            )
+        )
+        self._option.paddle_infer_option.mkldnn_cache_size = cache_size
+
+    def enable_lite_fp16(self):
+        """Enable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default."""
+        logging.warning(
+            "`RuntimeOption.enable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = True` instead."
+        )
+        self._option.paddle_lite_option.enable_fp16 = True
+
+    def disable_lite_fp16(self):
+        """Disable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default."""
+        logging.warning(
+            "`RuntimeOption.disable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = False` instead."
+        )
+        self._option.paddle_lite_option.enable_fp16 = False
+
+    def set_lite_power_mode(self, mode):
+        """Set POWER mode while using Paddle Lite backend on ARM CPU."""
+        logging.warning(
+            "`RuntimeOption.set_lite_powermode` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.power_mode = {}` instead.".format(
+                mode
+            )
+        )
+        self._option.paddle_lite_option.power_mode = mode
+
+    def set_trt_input_shape(
+        self, tensor_name, min_shape, opt_shape=None, max_shape=None
+    ):
+        """Set shape range information while using TensorRT backend with loadding a model contains dynamic input shape. While inference with a new input shape out of the set shape range, the tensorrt engine will be rebuilt to expand the shape range information.
+
+        :param tensor_name: (str)Name of input which has dynamic shape
+        :param min_shape: (list of int)Minimum shape of the input, e.g [1, 3, 224, 224]
+        :param opt_shape: (list of int)Optimize shape of the input, this offten set as the most common input shape, if set to None, it will keep same with min_shape
+        :param max_shape: (list of int)Maximum shape of the input, e.g [8, 3, 224, 224], if set to None, it will keep same with the min_shape
+        """
+        logging.warning(
+            "`RuntimeOption.set_trt_input_shape` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.set_shape()` instead."
+        )
+        if opt_shape is None and max_shape is None:
+            opt_shape = min_shape
+            max_shape = min_shape
+        else:
+            assert (
+                opt_shape is not None and max_shape is not None
+            ), "Set min_shape only, or set min_shape, opt_shape, max_shape both."
+        return self._option.trt_option.set_shape(
+            tensor_name, min_shape, opt_shape, max_shape
+        )
+
+    def set_trt_input_data(
+        self, tensor_name, min_input_data, opt_input_data=None, max_input_data=None
+    ):
+        """Set input data while using TensorRT backend with loadding a model contains dynamic input shape.
+
+        :param tensor_name: (str)Name of input which has dynamic shape
+        :param min_input_data: (list of int)Input data for Minimum shape of the input.
+        :param opt_input_data: (list of int)Input data for Optimize shape of the input, if set to None, it will keep same with min_input_data
+        :param max_input_data: (list of int)Input data for Maximum shape of the input, if set to None, it will keep same with the min_input_data
+        """
+        logging.warning(
+            "`RuntimeOption.set_trt_input_data` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.set_input_data()` instead."
+        )
+        if opt_input_data is None and max_input_data is None:
+            opt_input_data = min_input_data
+            opt_input_data = min_input_data
+        else:
+            assert (
+                opt_input_data is not None and max_input_data is not None
+            ), "Set min_input_data only, or set min_input_data, opt_input_data, max_input_data both."
+        return self._option.trt_option.set_input_data(
+            tensor_name, min_input_data, opt_input_data, max_input_data
+        )
+
+    def set_trt_cache_file(self, cache_file_path):
+        """Set a cache file path while using TensorRT backend. While loading a Paddle/ONNX model with set_trt_cache_file("./tensorrt_cache/model.trt"), if file `./tensorrt_cache/model.trt` exists, it will skip building tensorrt engine and load the cache file directly; if file `./tensorrt_cache/model.trt` doesn't exist, it will building tensorrt engine and save the engine as binary string to the cache file.
+
+        :param cache_file_path: (str)Path of tensorrt cache file
+        """
+        logging.warning(
+            "`RuntimeOption.set_trt_cache_file` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.serialize_file = {}` instead.".format(
+                cache_file_path
+            )
+        )
+        self._option.trt_option.serialize_file = cache_file_path
+
+    def enable_trt_fp16(self):
+        """Enable half precision inference while using TensorRT backend, notice that not all the Nvidia GPU support FP16, in those cases, will fallback to FP32 inference."""
+        logging.warning(
+            "`RuntimeOption.enable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = True` instead."
+        )
+        self._option.trt_option.enable_fp16 = True
+
+    def disable_trt_fp16(self):
+        """Disable half precision inference while suing TensorRT backend."""
+        logging.warning(
+            "`RuntimeOption.disable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = False` instead."
+        )
+        self._option.trt_option.enable_fp16 = False
+
+    def enable_pinned_memory(self):
+        """Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend."""
+        return self._option.enable_pinned_memory()
+
+    def disable_pinned_memory(self):
+        """Disable pinned memory."""
+        return self._option.disable_pinned_memory()
+
+    def enable_paddle_to_trt(self):
+        """While using TensorRT backend, enable_paddle_to_trt() will change to use Paddle Inference backend, and use its integrated TensorRT instead."""
+        logging.warning(
+            "`RuntimeOption.enable_paddle_to_trt` will be deprecated in v1.2.l0, if you want to run tensorrt with Paddle Inference backend, please use the following method, "
+        )
+        logging.warning("    ==============================================")
+        logging.warning("    import ultrainfer as fd")
+        logging.warning("    option = fd.RuntimeOption()")
+        logging.warning("    option.use_gpu(0)")
+        logging.warning("    option.use_paddle_infer_backend()")
+        logging.warning("    option.paddle_infer_option.enable_trt = True")
+        logging.warning("    ==============================================")
+        self._option.use_paddle_backend()
+        self._option.paddle_infer_option.enable_trt = True
+
+    def set_trt_max_workspace_size(self, trt_max_workspace_size):
+        """Set max workspace size while using TensorRT backend."""
+        logging.warning(
+            "`RuntimeOption.set_trt_max_workspace_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_workspace_size = {}` instead.".format(
+                trt_max_workspace_size
+            )
+        )
+        self._option.trt_option.max_workspace_size = trt_max_workspace_size
+
+    def set_trt_max_batch_size(self, trt_max_batch_size):
+        """Set max batch size while using TensorRT backend."""
+        logging.warning(
+            "`RuntimeOption.set_trt_max_batch_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_batch_size = {}` instead.".format(
+                trt_max_batch_size
+            )
+        )
+        self._option.trt_option.max_batch_size = trt_max_batch_size
+
+    def enable_paddle_trt_collect_shape(self):
+        """Enable collect subgraph shape information while using Paddle Inference with TensorRT"""
+        logging.warning(
+            "`RuntimeOption.enable_paddle_trt_collect_shape` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.collect_trt_shape = True` instead."
+        )
+        self._option.paddle_infer_option.collect_trt_shape = True
+
+    def disable_paddle_trt_collect_shape(self):
+        """Disable collect subgraph shape information while using Paddle Inference with TensorRT"""
+        logging.warning(
+            "`RuntimeOption.disable_paddle_trt_collect_shape` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.collect_trt_shape = False` instead."
+        )
+        self._option.paddle_infer_option.collect_trt_shape = False
+
+    def delete_paddle_backend_pass(self, pass_name):
+        """Delete pass by name in paddle backend"""
+        logging.warning(
+            "`RuntimeOption.delete_paddle_backend_pass` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.delete_pass` instead."
+        )
+        self._option.paddle_infer_option.delete_pass(pass_name)
+
+    def disable_paddle_trt_ops(self, ops):
+        """Disable some ops in paddle trt backend"""
+        logging.warning(
+            "`RuntimeOption.disable_paddle_trt_ops` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.disable_trt_ops()` instead."
+        )
+        self._option.disable_trt_ops(ops)
+
+    def use_ipu(
+        self,
+        device_num=1,
+        micro_batch_size=1,
+        enable_pipelining=False,
+        batches_per_step=1,
+    ):
+        return self._option.use_ipu(
+            device_num, micro_batch_size, enable_pipelining, batches_per_step
+        )
+
+    def set_ipu_config(
+        self,
+        enable_fp16=False,
+        replica_num=1,
+        available_memory_proportion=1.0,
+        enable_half_partial=False,
+    ):
+        logging.warning(
+            "`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead."
+        )
+        self._option.paddle_infer_option.set_ipu_config(
+            enable_fp16, replica_num, available_memory_proportion, enable_half_partial
+        )
+
+    @property
+    def poros_option(self):
+        """Get PorosBackendOption object to configure Poros backend
+
+        :return PorosBackendOption
+        """
+        return self._option.poros_option
+
+    @property
+    def paddle_lite_option(self):
+        """Get LiteBackendOption object to configure Paddle Lite backend
+
+        :return LiteBackendOption
+        """
+        return self._option.paddle_lite_option
+
+    @property
+    def openvino_option(self):
+        """Get OpenVINOOption object to configure OpenVINO backend
+
+        :return OpenVINOOption
+        """
+        return self._option.openvino_option
+
+    @property
+    def ort_option(self):
+        """Get OrtBackendOption object to configure ONNX Runtime backend
+
+        :return OrtBackendOption
+        """
+        return self._option.ort_option
+
+    @property
+    def trt_option(self):
+        """Get TrtBackendOption object to configure TensorRT backend
+
+        :return TrtBackendOption
+        """
+        return self._option.trt_option
+
+    @property
+    def paddle_infer_option(self):
+        """Get PaddleBackendOption object to configure Paddle Inference backend
+
+        :return PaddleBackendOption
+        """
+        return self._option.paddle_infer_option
+
+    def enable_profiling(self, inclue_h2d_d2h=False, repeat=100, warmup=50):
+        """Set the profile mode as 'true'.
+        :param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime.
+        :param repeat Repeat times for runtime inference.
+        :param warmup Warmup times for runtime inference.
+        """
+        return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup)
+
+    def disable_profiling(self):
+        """Set the profile mode as 'false'."""
+        return self._option.disable_profiling()
+
+    def set_external_raw_stream(self, cuda_stream):
+        """Set the external raw stream used by ultrainfer runtime."""
+        self._option.set_external_raw_stream(cuda_stream)
+
+    def __repr__(self):
+        attrs = dir(self._option)
+        message = "RuntimeOption(\n"
+        for attr in attrs:
+            if attr.startswith("__"):
+                continue
+            if hasattr(getattr(self._option, attr), "__call__"):
+                continue
+            message += "  {} : {}\t\n".format(attr, getattr(self._option, attr))
+        message.strip("\n")
+        message += ")"
+        return message
diff --git a/libs/ultrainfer/python/ultrainfer/text/__init__.py b/libs/ultrainfer/python/ultrainfer/text/__init__.py
new file mode 100755
index 0000000000..a21623f7e9
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/text/__init__.py
@@ -0,0 +1,18 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from . import uie
+from .uie import UIEModel
+from .uie import SchemaLanguage
diff --git a/libs/ultrainfer/python/ultrainfer/text/uie/__init__.py b/libs/ultrainfer/python/ultrainfer/text/uie/__init__.py
new file mode 100755
index 0000000000..c3554f763c
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/text/uie/__init__.py
@@ -0,0 +1,105 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+import logging
+from ... import RuntimeOption, UltraInferModel, ModelFormat
+from ... import c_lib_wrap as C
+
+
+class SchemaLanguage(object):
+    ZH = 0
+    EN = 1
+
+
+class SchemaNode(object):
+    def __init__(self, name, children=[]):
+        schema_node_children = []
+        if isinstance(children, str):
+            children = [children]
+        for child in children:
+            if isinstance(child, str):
+                schema_node_children += [C.text.SchemaNode(child, [])]
+            elif isinstance(child, dict):
+                for key, val in child.items():
+                    schema_node_child = SchemaNode(key, val)
+                    schema_node_children += [schema_node_child._schema_node]
+            else:
+                assert "The type of child of SchemaNode should be str or dict."
+        self._schema_node = C.text.SchemaNode(name, schema_node_children)
+        self._schema_node_children = schema_node_children
+
+
+class UIEModel(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        vocab_file,
+        position_prob=0.5,
+        max_length=128,
+        schema=[],
+        batch_size=64,
+        runtime_option=RuntimeOption(),
+        model_format=ModelFormat.PADDLE,
+        schema_language=SchemaLanguage.ZH,
+    ):
+        if isinstance(schema, list):
+            schema = SchemaNode("", schema)._schema_node_children
+        elif isinstance(schema, dict):
+            schema_tmp = []
+            for key, val in schema.items():
+                schema_tmp += [SchemaNode(key, val)._schema_node]
+            schema = schema_tmp
+        else:
+            assert "The type of schema should be list or dict."
+        schema_language = C.text.SchemaLanguage(schema_language)
+        self._model = C.text.UIEModel(
+            model_file,
+            params_file,
+            vocab_file,
+            position_prob,
+            max_length,
+            schema,
+            batch_size,
+            runtime_option._option,
+            model_format,
+            schema_language,
+        )
+        assert self.initialized, "UIEModel initialize failed."
+
+    def set_schema(self, schema):
+        if isinstance(schema, list):
+            schema = SchemaNode("", schema)._schema_node_children
+        elif isinstance(schema, dict):
+            schema_tmp = []
+            for key, val in schema.items():
+                schema_tmp += [SchemaNode(key, val)._schema_node]
+            schema = schema_tmp
+        self._model.set_schema(schema)
+
+    def predict(self, texts, return_dict=False):
+        results = self._model.predict(texts)
+        if not return_dict:
+            return results
+        new_results = []
+        for result in results:
+            uie_result = dict()
+            for key, uie_results in result.items():
+                uie_result[key] = list()
+                for uie_res in uie_results:
+                    uie_result[key].append(uie_res.get_dict())
+            new_results += [uie_result]
+        return new_results
diff --git a/libs/ultrainfer/python/ultrainfer/ts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/__init__.py
new file mode 100755
index 0000000000..2128c729e5
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/ts/__init__.py
@@ -0,0 +1,18 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from . import anomalydetection
+from . import classification
+from . import forecasting
diff --git a/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py
new file mode 100755
index 0000000000..a4e90a611b
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .ppts import *
diff --git a/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py
new file mode 100755
index 0000000000..ca7938f864
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/ts/anomalydetection/ppts/__init__.py
@@ -0,0 +1,168 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+import os
+from copy import deepcopy
+import numpy as np
+import pandas as pd
+from typing import List
+from dataclasses import dataclass
+
+from .... import UltraInferModel, ModelFormat
+from ....py_only.ts import PyOnlyTSModel
+from ....utils.misc import load_config
+from ....py_only import PyOnlyProcessorChain
+from ....py_only.ts import PyOnlyTSModel, processors as P
+
+
+class PyOnlyAnomalyDetectionModel(PyOnlyTSModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        scaler_file=None,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        self._model_file = model_file
+        self._params_file = params_file
+        self._model_format = model_format
+        super().__init__(runtime_option)
+        if scaler_file is None:
+            config_dir = os.path.dirname(config_file)
+            scaler_file = os.path.join(config_dir, "scaler.pkl")
+        self._config = load_config(config_file)
+        self._preprocessor = _PyOnlyAnomalyDetectionPreprocessor(
+            self._config, scaler_file
+        )
+        self._postprocessor = _PyOnlyAnomalyDetectionPostprocessor(self._config)
+
+    def model_name():
+        return "PyOnlyAnomalyDetectionModel"
+
+    def batch_predict(self, ts_list):
+        data_list = []
+        for csv_data in ts_list:
+            data = {"ori_ts": deepcopy(csv_data), "ts": csv_data}
+            data = self._preprocessor.run(data)
+            data_list.append(data)
+
+        input_data = {}
+        input_num = self._runtime.num_inputs()
+        for idx in range(input_num):
+            input_name = self._runtime.get_input_info(idx).name
+            ts_data = np.stack(
+                [data["ts"][idx] for data in data_list], axis=0, dtype=np.float32
+            )
+            ts_data = np.ascontiguousarray(ts_data)
+            input_data[input_name] = ts_data
+
+        output_arrs = self._runtime.infer(input_data)
+
+        results = []
+        for idx, data in enumerate(output_arrs[0]):
+            data = {"ori_ts": data_list[idx]["ori_ts"], "pred": data}
+            result = self._postprocessor.run(data)
+            results.append(result)
+        return results
+
+    def _update_option(self):
+        self._option.set_model_path(
+            self._model_file, self._params_file, self._model_format
+        )
+
+
+class _PyOnlyAnomalyDetectionPreprocessor(object):
+    def __init__(self, config, scaler_file):
+        super().__init__()
+        self.scaler_file = scaler_file
+        processors = self._build_processors(config)
+        self._processor_chain = PyOnlyProcessorChain(processors)
+
+    def run(self, data):
+        return self._processor_chain(data)
+
+    def _build_processors(self, config):
+        processors = []
+        processors.append(P.CutOff(config["size"]))
+
+        if config.get("scale", None):
+            if not os.path.exists(self.scaler_file):
+                raise Exception(f"Cannot find scaler file: {self.scaler_file}")
+            processors.append(P.Normalize(self.scaler_file, config["info_params"]))
+
+        processors.append(P.BuildTSDataset(config["info_params"]))
+
+        if config.get("time_feat", None):
+            processors.append(
+                P.CalcTimeFeatures(
+                    config["info_params"],
+                    config["size"],
+                    config["holiday"],
+                )
+            )
+
+        processors.append(P.DataFrame2Arrays(config["input_data"]))
+        return processors
+
+
+class _PyOnlyAnomalyDetectionPostprocessor(object):
+    def __init__(self, config):
+        super().__init__()
+        self.model_threshold = config["model_threshold"]
+        self.info_params = config["info_params"]
+
+    def run(self, data):
+        ori_ts = data["ori_ts"]
+        pred = data["pred"]
+        if ori_ts.get("past_target", None) is not None:
+            ts = ori_ts["past_target"]
+        elif ori_ts.get("observed_cov_numeric", None) is not None:
+            ts = ori_ts["observed_cov_numeric"]
+        elif ori_ts.get("known_cov_numeric", None) is not None:
+            ts = ori_ts["known_cov_numeric"]
+        elif ori_ts.get("static_cov_numeric", None) is not None:
+            ts = ori_ts["static_cov_numeric"]
+        else:
+            raise ValueError("No value in ori_ts")
+        column_name = (
+            self.info_params["target_cols"]
+            if "target_cols" in self.info_params
+            else self.info_params["feature_cols"]
+        )
+
+        anomaly_score = np.mean(np.square(pred - np.array(ts)), axis=-1)
+        anomaly_label = (anomaly_score >= self.model_threshold) + 0
+
+        past_target_index = ts.index
+        past_target_index.name = self.info_params["time_col"]
+
+        label = anomaly_label.tolist()
+        dates = past_target_index.tolist()
+        col_names = ["label"]
+        data = [label]
+        result = _PyOnlyAnomalyDetectionResult(
+            dates=dates, col_names=col_names, data=data
+        )
+        return result
+
+
+@dataclass
+class _PyOnlyAnomalyDetectionResult(object):
+    dates: List[int]
+    col_names: List[str]
+    data: List[List[int]]
diff --git a/libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py
new file mode 100755
index 0000000000..a4e90a611b
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/ts/classification/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .ppts import *
diff --git a/libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py
new file mode 100755
index 0000000000..8fb4cc5778
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/ts/classification/ppts/__init__.py
@@ -0,0 +1,128 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+import os
+from copy import deepcopy
+import numpy as np
+from dataclasses import dataclass
+
+from .... import ModelFormat
+from ....py_only.ts import PyOnlyTSModel
+from ....utils.misc import load_config
+from ....py_only import PyOnlyProcessorChain
+from ....py_only.ts import PyOnlyTSModel, processors as P
+
+
+class PyOnlyClassificationModel(PyOnlyTSModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        scaler_file=None,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        self._model_file = model_file
+        self._params_file = params_file
+        self._model_format = model_format
+        super().__init__(runtime_option)
+        if scaler_file is None:
+            config_dir = os.path.dirname(config_file)
+            scaler_file = os.path.join(config_dir, "scaler.pkl")
+        self._config = load_config(config_file)
+        self._preprocessor = _PyOnlyClassificationPreprocessor(
+            self._config, scaler_file
+        )
+        self._postprocessor = _PyOnlyClassificationPostprocessor()
+
+    def model_name():
+        return "PyOnlyClassificationModel"
+
+    def batch_predict(self, ts_list):
+        data_list = []
+        for csv_data in ts_list:
+            data = {"ori_ts": deepcopy(csv_data), "ts": csv_data}
+            data = self._preprocessor.run(data)
+            data_list.append(data)
+
+        input_data = {}
+        input_num = self._runtime.num_inputs()
+        for idx in range(input_num):
+            input_name = self._runtime.get_input_info(idx).name
+            ts_data = np.stack(
+                [data["ts"][idx] for data in data_list], axis=0, dtype=np.float32
+            )
+            ts_data = np.ascontiguousarray(ts_data)
+            input_data[input_name] = ts_data
+
+        output_arrs = self._runtime.infer(input_data)
+
+        results = []
+        for data in output_arrs[0]:
+            data = {"pred": data}
+            result = self._postprocessor.run(data)
+            results.append(result)
+        return results
+
+    def _update_option(self):
+        self._option.set_model_path(
+            self._model_file, self._params_file, self._model_format
+        )
+
+
+class _PyOnlyClassificationPreprocessor(object):
+    def __init__(self, config, scaler_file):
+        super().__init__()
+        self.scaler_file = scaler_file
+        processors = self._build_processors(config)
+        self._processor_chain = PyOnlyProcessorChain(processors)
+
+    def run(self, data):
+        return self._processor_chain(data)
+
+    def _build_processors(self, config):
+        processors = []
+
+        if config.get("scale", None):
+            if not os.path.exists(self.scaler_file):
+                raise Exception(f"Cannot find scaler file: {self.scaler_file}")
+            processors.append(P.Normalize(self.scaler_file, config["info_params"]))
+
+        processors.append(P.BuildTSDataset(config["info_params"]))
+        processors.append(P.BuildPaddedMask(config["input_data"]))
+        processors.append(P.DataFrame2Arrays(config["input_data"]))
+        return processors
+
+
+class _PyOnlyClassificationPostprocessor(object):
+    def __init__(self):
+        super().__init__()
+
+    def run(self, data):
+        pred_ts = data["pred"]
+        pred_ts -= np.max(pred_ts, axis=-1, keepdims=True)
+        pred_ts = np.exp(pred_ts) / np.sum(np.exp(pred_ts), axis=-1, keepdims=True)
+        class_id = np.argmax(pred_ts, axis=-1)
+        pred_score = pred_ts[class_id]
+        result = _PyOnlyClassificationResult(class_id=class_id, score=pred_score)
+        return result
+
+
+@dataclass
+class _PyOnlyClassificationResult(object):
+    class_id: int
+    score: float
diff --git a/libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py
new file mode 100755
index 0000000000..a4e90a611b
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/ts/forecasting/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .ppts import *
diff --git a/libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py b/libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py
new file mode 100755
index 0000000000..56a5634163
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/ts/forecasting/ppts/__init__.py
@@ -0,0 +1,195 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+import os
+from copy import deepcopy
+import numpy as np
+import pandas as pd
+from typing import List
+from dataclasses import dataclass
+
+from .... import UltraInferModel, ModelFormat
+from ....py_only.ts import PyOnlyTSModel
+from ....utils.misc import load_config
+from ....py_only import PyOnlyProcessorChain
+from ....py_only.ts import PyOnlyTSModel, processors as P
+
+
+class PyOnlyForecastingModel(PyOnlyTSModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        scaler_file=None,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        self._model_file = model_file
+        self._params_file = params_file
+        self._model_format = model_format
+        super().__init__(runtime_option)
+        if scaler_file is None:
+            config_dir = os.path.dirname(config_file)
+            scaler_file = os.path.join(config_dir, "scaler.pkl")
+        self._config = load_config(config_file)
+        self._preprocessor = _PyOnlyForecastingPreprocessor(self._config, scaler_file)
+        self._postprocessor = _PyOnlyForecastingPostprocessor(self._config, scaler_file)
+
+    def model_name():
+        return "PyOnlyForecastingModel"
+
+    def batch_predict(self, ts_list):
+        data_list = []
+        for csv_data in ts_list:
+            data = {"ori_ts": deepcopy(csv_data), "ts": csv_data}
+            data = self._preprocessor.run(data)
+            data_list.append(data)
+
+        input_data = {}
+        input_num = self._runtime.num_inputs()
+        for idx in range(input_num):
+            input_name = self._runtime.get_input_info(idx).name
+            ts_data = np.stack(
+                [data["ts"][idx] for data in data_list], axis=0, dtype=np.float32
+            )
+            ts_data = np.ascontiguousarray(ts_data)
+            input_data[input_name] = ts_data
+
+        output_arrs = self._runtime.infer(input_data)
+
+        results = []
+        for idx, data in enumerate(output_arrs[0]):
+            data = {"ori_ts": data_list[idx]["ori_ts"], "pred": data}
+            result = self._postprocessor.run(data)
+            results.append(result)
+        return results
+
+    def _update_option(self):
+        self._option.set_model_path(
+            self._model_file, self._params_file, self._model_format
+        )
+
+
+class _PyOnlyForecastingPreprocessor(object):
+    def __init__(self, config, scaler_file):
+        super().__init__()
+        self.scaler_file = scaler_file
+        processors = self._build_processors(config)
+        self._processor_chain = PyOnlyProcessorChain(processors)
+
+    def run(self, data):
+        return self._processor_chain(data)
+
+    def _build_processors(self, config):
+        processors = []
+        processors.append(P.CutOff(config["size"]))
+
+        if config.get("scale", None):
+            if not os.path.exists(self.scaler_file):
+                raise Exception(f"Cannot find scaler file: {self.scaler_file}")
+            processors.append(P.Normalize(self.scaler_file, config["info_params"]))
+
+        processors.append(P.BuildTSDataset(config["info_params"]))
+
+        if config.get("time_feat", None):
+            processors.append(
+                P.CalcTimeFeatures(
+                    config["info_params"],
+                    config["size"],
+                    config["holiday"],
+                )
+            )
+
+        processors.append(P.DataFrame2Arrays(config["input_data"]))
+        return processors
+
+
+class _PyOnlyForecastingPostprocessor(object):
+    def __init__(self, config, scaler_file):
+        super().__init__()
+        self.scaler_file = scaler_file
+        self.info_params = config["info_params"]
+        processors = self._build_processors(config)
+        self._processor_chain = PyOnlyProcessorChain(processors)
+
+    def run(self, data):
+        ori_ts = data["ori_ts"]
+        pred = data["pred"]
+        if ori_ts.get("past_target", None) is not None:
+            ts = ori_ts["past_target"]
+        elif ori_ts.get("observed_cov_numeric", None) is not None:
+            ts = ori_ts["observed_cov_numeric"]
+        elif ori_ts.get("known_cov_numeric", None) is not None:
+            ts = ori_ts["known_cov_numeric"]
+        elif ori_ts.get("static_cov_numeric", None) is not None:
+            ts = ori_ts["static_cov_numeric"]
+        else:
+            raise ValueError("No value in ori_ts")
+
+        column_name = (
+            self.info_params["target_cols"]
+            if "target_cols" in self.info_params
+            else self.info_params["feature_cols"]
+        )
+        if isinstance(self.info_params["freq"], str):
+            past_target_index = ts.index
+            if past_target_index.freq is None:
+                past_target_index.freq = pd.infer_freq(ts.index)
+            future_target_index = pd.date_range(
+                past_target_index[-1] + past_target_index.freq,
+                periods=pred.shape[0],
+                freq=self.info_params["freq"],
+                name=self.info_params["time_col"],
+            )
+        elif isinstance(self.info_params["freq"], int):
+            start_idx = max(ts.index) + 1
+            stop_idx = start_idx + pred.shape[0]
+            future_target_index = pd.RangeIndex(
+                start=start_idx,
+                stop=stop_idx,
+                step=self.info_params["freq"],
+                name=self.info_params["time_col"],
+            )
+
+        future_target = pd.DataFrame(
+            np.reshape(pred, newshape=[pred.shape[0], -1]),
+            index=future_target_index,
+            columns=column_name,
+        )
+        data = {"pred": future_target}
+        forecast_dataframe = self._processor_chain(data)
+        forecast = forecast_dataframe["pred"]
+        col_names = forecast.columns.tolist()
+        data = [forecast[col_name].tolist() for col_name in col_names]
+        dates = [int(i.timestamp()) for i in forecast.index]
+        result = _PyOnlyForecastingResult(dates=dates, col_names=col_names, data=data)
+        return result
+
+    def _build_processors(self, config):
+        processors = []
+        if config.get("scale", None):
+            if not os.path.exists(self.scaler_file):
+                raise Exception(f"Cannot find scaler file: {self.scaler_file}")
+            processors.append(P.Denormalize(self.scaler_file, config["info_params"]))
+        return processors
+
+
+@dataclass
+class _PyOnlyForecastingResult(object):
+    dates: List[int]
+    col_names: List[str]
+    data: List[List[float]]
diff --git a/libs/ultrainfer/python/ultrainfer/utils/__init__.py b/libs/ultrainfer/python/ultrainfer/utils/__init__.py
new file mode 100755
index 0000000000..2379939471
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/utils/__init__.py
@@ -0,0 +1,14 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .example_resource import get_detection_test_image
diff --git a/libs/ultrainfer/python/ultrainfer/utils/example_resource.py b/libs/ultrainfer/python/ultrainfer/utils/example_resource.py
new file mode 100755
index 0000000000..dd41fc7664
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/utils/example_resource.py
@@ -0,0 +1,26 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .. import download
+from . import hub_env
+
+
+def get_detection_test_image(path=None):
+    if path is None:
+        path = hub_env.RESOURCE_HOME
+    fullpath = download(
+        url="https://bj.bcebos.com/paddlehub/fastdeploy/example/detection_test_image.jpg",
+        path=path,
+    )
+    return fullpath
diff --git a/libs/ultrainfer/python/ultrainfer/utils/hub_config.py b/libs/ultrainfer/python/ultrainfer/utils/hub_config.py
new file mode 100755
index 0000000000..3e3d4a5d23
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/utils/hub_config.py
@@ -0,0 +1,76 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import hashlib
+import os
+import time
+import json
+import uuid
+import yaml
+
+from . import hub_env as hubenv
+
+
+class HubConfig:
+    """
+    UltraInfer model management configuration class.
+    """
+
+    def __init__(self):
+        self._initialize()
+        self.file = os.path.join(hubenv.CONF_HOME, "config.yaml")
+
+        if not os.path.exists(self.file):
+            self.flush()
+            return
+
+        with open(self.file, "r") as file:
+            try:
+                cfg = yaml.load(file, Loader=yaml.FullLoader)
+                self.data.update(cfg)
+            except:
+                ...
+
+    def _initialize(self):
+        # Set default configuration values.
+        self.data = {}
+        self.data["server"] = "http://paddlepaddle.org.cn/paddlehub"
+
+    def reset(self):
+        """Reset configuration to default."""
+        self._initialize()
+        self.flush()
+
+    @property
+    def server(self):
+        """Model server url."""
+        return self.data["server"]
+
+    @server.setter
+    def server(self, url: str):
+        self.data["server"] = url
+        self.flush()
+
+    def flush(self):
+        """Flush the current configuration into the configuration file."""
+        with open(self.file, "w") as file:
+            cfg = json.loads(json.dumps(self.data))
+            yaml.dump(cfg, file)
+
+    def __str__(self):
+        cfg = json.loads(json.dumps(self.data))
+        return yaml.dump(cfg)
+
+
+config = HubConfig()
diff --git a/libs/ultrainfer/python/ultrainfer/utils/hub_env.py b/libs/ultrainfer/python/ultrainfer/utils/hub_env.py
new file mode 100755
index 0000000000..10eb2bbc18
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/utils/hub_env.py
@@ -0,0 +1,57 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This module is used to store environmental variables for ultrainfer model hub.
+
+ULTRAINFER_HUB_HOME              -->  the root directory for storing ultrainfer model hub related data. Default to ~/.ultrainfer. Users can change the
+├                          default value through the ULTRAINFER_HUB_HOME environment variable.
+├── MODEL_HOME       -->   Store the downloaded ultrainfer models.
+├── CONF_HOME        -->   Store the default configuration files.
+"""
+
+import os
+
+
+def _get_user_home():
+    return os.path.expanduser("~")
+
+
+def _get_hub_home():
+    if "ULTRAINFER_HUB_HOME" in os.environ:
+        home_path = os.environ["ULTRAINFER_HUB_HOME"]
+        if os.path.exists(home_path):
+            if os.path.isdir(home_path):
+                return home_path
+            else:
+                raise RuntimeError(
+                    "The environment variable ULTRAINFER_HUB_HOME {} is not a directory.".format(
+                        home_path
+                    )
+                )
+        else:
+            return home_path
+    return os.path.join(_get_user_home(), ".ultrainfer")
+
+
+def _get_sub_home(directory):
+    home = os.path.join(_get_hub_home(), directory)
+    os.makedirs(home, exist_ok=True)
+    return home
+
+
+USER_HOME = _get_user_home()
+HUB_HOME = _get_hub_home()
+MODEL_HOME = _get_sub_home("models")
+CONF_HOME = _get_sub_home("conf")
+RESOURCE_HOME = _get_sub_home("resources")
diff --git a/libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py b/libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py
new file mode 100755
index 0000000000..3fd05e0c51
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/utils/hub_model_server.py
@@ -0,0 +1,134 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import requests
+from typing import List
+
+from .hub_config import config
+
+
+class ServerConnectionError(Exception):
+    def __init__(self, url: str):
+        self.url = url
+
+    def __str__(self):
+        tips = "Can't connect to UltraInfer Model Server: {}".format(self.url)
+        return tips
+
+
+class ModelServer(object):
+    """
+    UltraInfer server source
+
+    Args:
+        url(str) : Url of the server
+        timeout(int) : Request timeout
+    """
+
+    def __init__(self, url: str, timeout: int = 10):
+        self._url = url
+        self._timeout = timeout
+
+    def search_model(
+        self, name: str, format: str = None, version: str = None
+    ) -> List[dict]:
+        """
+        Search model from model server.
+
+        Args:
+            name(str) : UltraInfer model name
+            format(str): UltraInfer model format
+            version(str) : UltraInfer model version
+        Return:
+            result(list): search results
+        """
+        params = {}
+        params["name"] = name
+        if format:
+            params["format"] = format
+        if version:
+            params["version"] = version
+        result = self.request(path="ultrainfer_search", params=params)
+        if result["status"] == 0 and len(result["data"]) > 0:
+            return result["data"]
+        return None
+
+    def stat_model(self, name: str, format: str, version: str):
+        """
+        Note a record when download a model for statistics.
+
+        Args:
+            name(str) : UltraInfer model name
+            format(str): UltraInfer model format
+            version(str) : UltraInfer model version
+        Return:
+            is_successful(bool): True if successful, False otherwise
+        """
+        params = {}
+        params["name"] = name
+        params["format"] = format
+        params["version"] = version
+        params["from"] = "ultrainfer"
+        try:
+            result = self.request(path="stat", params=params)
+        except Exception:
+            return False
+        if result["status"] == 0:
+            return True
+        else:
+            return False
+
+    def request(self, path: str, params: dict) -> dict:
+        """Request server."""
+        api = "{}/{}".format(self._url, path)
+        try:
+            result = requests.get(api, params, timeout=self._timeout)
+            return result.json()
+        except requests.exceptions.ConnectionError as e:
+            raise ServerConnectionError(self._url)
+
+    def get_model_list(self):
+        """
+        Get all pre-trained models information in dataset.
+        Return:
+            result(dict): key is category name, value is a list which contains models \
+                information such as name, format and version.
+        """
+        api = "{}/{}".format(self._url, "ultrainfer_listmodels")
+        try:
+            result = requests.get(api, timeout=self._timeout)
+            return result.json()
+        except requests.exceptions.ConnectionError as e:
+            raise ServerConnectionError(self._url)
+
+    def is_connected(self):
+        return self.check(self._url)
+
+    @classmethod
+    def check(cls, url: str) -> bool:
+        """
+        Check if the specified url is a valid model server
+
+        Args:
+            url(str) : Url to check
+        """
+        try:
+            r = requests.get(url + "/search")
+            return r.status_code == 200
+        except:
+            return False
+
+
+model_server = ModelServer(config.server)
diff --git a/libs/ultrainfer/python/ultrainfer/utils/misc.py b/libs/ultrainfer/python/ultrainfer/utils/misc.py
new file mode 100755
index 0000000000..bac49d9dc0
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/utils/misc.py
@@ -0,0 +1,20 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import yaml
+
+
+def load_config(config_path):
+    with open(config_path, "r", encoding="utf-8") as f:
+        return yaml.safe_load(f)
diff --git a/libs/ultrainfer/python/ultrainfer/vision/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/__init__.py
new file mode 100755
index 0000000000..f63607950c
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/__init__.py
@@ -0,0 +1,41 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from . import detection
+from . import classification
+from . import segmentation
+from . import tracking
+from . import keypointdetection
+from . import matting
+from . import facedet
+from . import facealign
+from . import faceid
+from . import ocr
+from . import headpose
+from . import sr
+from . import evaluation
+from . import generation
+from . import perception
+from .utils import fd_result_to_json
+from .visualize import *
+from .. import C
+
+
+def enable_flycv():
+    return C.vision.enable_flycv()
+
+
+def disable_flycv():
+    return C.vision.disable_flycv()
diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py
new file mode 100755
index 0000000000..af2b7f000d
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/classification/__init__.py
@@ -0,0 +1,36 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .contrib.yolov5cls import YOLOv5Cls
+from .ppcls import *
+from .ppshitu import PPShiTuV2Detector
+from .ppshitu import PPShiTuV2Recognizer
+from .ppshitu import PPShiTuV2RecognizerPreprocessor
+from .ppshitu import PPShiTuV2RecognizerPostprocessor
+from .contrib.resnet import ResNet
+
+PPLCNet = PaddleClasModel
+PPLCNetv2 = PaddleClasModel
+EfficientNet = PaddleClasModel
+GhostNet = PaddleClasModel
+MobileNetv1 = PaddleClasModel
+MobileNetv2 = PaddleClasModel
+MobileNetv3 = PaddleClasModel
+ShuffleNetv2 = PaddleClasModel
+SqueezeNet = PaddleClasModel
+Inceptionv3 = PaddleClasModel
+PPHGNet = PaddleClasModel
+ResNet50vd = PaddleClasModel
+SwinTransformer = PaddleClasModel
diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py
new file mode 100755
index 0000000000..487e92d1d7
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/resnet.py
@@ -0,0 +1,104 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class ResNet(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a image classification model exported by torchvision.ResNet.
+
+        :param model_file: (str)Path of model file, e.g resnet/resnet50.onnx
+        :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX
+        """
+
+        # call super() to initialize the backend_option
+        # the result of initialization will be saved in self._runtime_option
+        super(ResNet, self).__init__(runtime_option)
+
+        self._model = C.vision.classification.ResNet(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # self.initialized shows the initialization of the model is successful or not
+
+        assert self.initialized, "ResNet initialize failed."
+
+    # Predict and return the inference result of "input_image".
+    def predict(self, input_image, topk=1):
+        """Classify an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param topk: (int)The topk result by the classify confidence score, default 1
+        :return: ClassifyResult
+        """
+        return self._model.predict(input_image, topk)
+
+    # Implement the setter and getter method for variables
+    @property
+    def size(self):
+        """
+        Returns the preprocess image size, default size = [224, 224];
+        """
+        return self._model.size
+
+    @property
+    def mean_vals(self):
+        """
+        Returns the mean value of normlization, default mean_vals = [0.485f, 0.456f, 0.406f];
+        """
+        return self._model.mean_vals
+
+    @property
+    def std_vals(self):
+        """
+        Returns the std value of normlization, default std_vals = [0.229f, 0.224f, 0.225f];
+        """
+        return self._model.std_vals
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @mean_vals.setter
+    def mean_vals(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `mean_vals` must be type of list."
+        self._model.mean_vals = value
+
+    @std_vals.setter
+    def std_vals(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `std_vals` must be type of list."
+        self._model.std_vals = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py
new file mode 100755
index 0000000000..522d8a5428
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/classification/contrib/yolov5cls.py
@@ -0,0 +1,140 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv5ClsPreprocessor:
+    def __init__(self):
+        """Create a preprocessor for YOLOv5Cls"""
+        self._preprocessor = C.vision.classification.YOLOv5ClsPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for YOLOv5Cls
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [224, 224]
+        """
+        return self._preprocessor.size
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+
+class YOLOv5ClsPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for YOLOv5Cls"""
+        self._postprocessor = C.vision.classification.YOLOv5ClsPostprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for YOLOv5Cls
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of ClassifyResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def topk(self):
+        """
+        topk for postprocessing, default is 1
+        """
+        return self._postprocessor.topk
+
+    @topk.setter
+    def topk(self, topk):
+        assert isinstance(topk, int), "The value to set `top k` must be type of int."
+        self._postprocessor.topk = topk
+
+
+class YOLOv5Cls(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv5Cls model exported by YOLOv5Cls.
+
+        :param model_file: (str)Path of model file, e.g ./YOLOv5Cls.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(YOLOv5Cls, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.ONNX
+        ), "YOLOv5Cls only support model format of ModelFormat.ONNX now."
+        self._model = C.vision.classification.YOLOv5Cls(
+            model_file, params_file, self._runtime_option, model_format
+        )
+
+        assert self.initialized, "YOLOv5Cls initialize failed."
+
+    def predict(self, input_image):
+        """Classify an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: ClassifyResult
+        """
+        assert input_image is not None, "Input image is None."
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of ClassifyResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get YOLOv5ClsPreprocessor object of the loaded model
+
+        :return YOLOv5ClsPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get YOLOv5ClsPostprocessor object of the loaded model
+
+        :return YOLOv5ClsPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py
new file mode 100755
index 0000000000..b19dc5908a
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/classification/ppcls/__init__.py
@@ -0,0 +1,288 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+import logging
+from dataclasses import dataclass
+from typing import List
+
+import numpy as np
+
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+from ...common import ProcessorManager
+from ....py_only import PyOnlyProcessorChain
+from ....py_only.vision import PyOnlyVisionModel, processors as P
+from ....utils.misc import load_config
+
+
+class PaddleClasPreprocessor(ProcessorManager):
+    def __init__(self, config_file):
+        """Create a preprocessor for PaddleClasModel from configuration file
+
+        :param config_file: (str)Path of configuration file, e.g resnet50/inference_cls.yaml
+        """
+        super(PaddleClasPreprocessor, self).__init__()
+        self._manager = C.vision.classification.PaddleClasPreprocessor(config_file)
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+    def initial_resize_on_cpu(self, v):
+        """
+        When the initial operator is Resize, and input image size is large,
+        maybe it's better to run resize on CPU, because the HostToDevice memcpy
+        is time consuming. Set this True to run the initial resize on CPU.
+        :param: v: True or False
+        """
+        self._manager.initial_resize_on_cpu(v)
+
+
+class PaddleClasPostprocessor:
+    def __init__(self, topk=1):
+        """Create a postprocessor for PaddleClasModel
+
+        :param topk: (int)Filter the top k classify label
+        """
+        self._postprocessor = C.vision.classification.PaddleClasPostprocessor(topk)
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for PaddleClasModel
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of ClassifyResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class PaddleClasModel(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a image classification model exported by PaddleClas.
+
+        :param model_file: (str)Path of model file, e.g resnet50/inference.pdmodel
+        :param params_file: (str)Path of parameters file, e.g resnet50/inference.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str) Path of configuration file for deploy, e.g resnet50/inference_cls.yaml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PaddleClasModel, self).__init__(runtime_option)
+        self._model = C.vision.classification.PaddleClasModel(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleClas model initialize failed."
+
+    def clone(self):
+        """Clone PaddleClasModel object
+
+        :return: a new PaddleClasModel object
+        """
+
+        class PaddleClasCloneModel(PaddleClasModel):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PaddleClasCloneModel(self._model.clone())
+        return clone_model
+
+    def predict(self, im, topk=1):
+        """Classify an input image
+
+        :param im: (numpy.ndarray) The input image data, a 3-D array with layout HWC, BGR format
+        :param topk: (int) Filter the topk classify result, default 1
+        :return: ClassifyResult
+        """
+
+        self.postprocessor.topk = topk
+        return self._model.predict(im)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of ClassifyResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get PaddleClasPreprocessor object of the loaded model
+
+        :return PaddleClasPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get PaddleClasPostprocessor object of the loaded model
+
+        :return PaddleClasPostprocessor
+        """
+        return self._model.postprocessor
+
+
+class _PyOnlyMultilabelClassificationPreprocessor(object):
+    def __init__(self, config):
+        super().__init__()
+        processors = self._build_processors(config)
+        processors.insert(0, P.BGR2RGB())
+        self._processor_chain = PyOnlyProcessorChain(processors)
+
+    def run(self, data):
+        return self._processor_chain(data)
+
+    def _build_processors(self, config):
+        processors = []
+        for item in config:
+            tf_type = next(iter(item))
+            args = item[tf_type]
+            if tf_type == "ResizeImage":
+                if args.keys() - {"resize_short", "size", "backend", "interpolation"}:
+                    raise ValueError
+                args.setdefault("resize_short", None)
+                args.setdefault("size", None)
+                # TODO: `backend` & `interpolation`
+                if not (args["resize_short"] or args["size"]):
+                    raise ValueError
+                if args.get("resize_short"):
+                    processor = P.ResizeByShort(
+                        target_short_edge=args["resize_short"],
+                        size_divisor=None,
+                        interp="LINEAR",
+                    )
+                else:
+                    processor = P.Resize(target_size=args["size"])
+            elif tf_type == "CropImage":
+                if args.keys() - {"size"}:
+                    raise ValueError
+                args.setdefault("size", 224)
+                processor = P.Crop(crop_size=args["size"])
+            elif tf_type == "NormalizeImage":
+                if args.keys() - {"mean", "std", "scale", "order", "channel_num"}:
+                    raise ValueError
+                args.setdefault("mean", [0.485, 0.456, 0.406])
+                args.setdefault("std", [0.229, 0.224, 0.225])
+                args.setdefault("scale", 1 / 255)
+                args.setdefault("order", "")
+                args.setdefault("channel_num", 3)
+                if args["order"] != "":
+                    raise ValueError
+                if args["channel_num"] != 3:
+                    raise ValueError
+                processor = P.Normalize(
+                    scale=args["scale"], mean=args["mean"], std=args["std"]
+                )
+            elif tf_type == "ToCHWImage":
+                if args:
+                    raise ValueError
+                processor = P.ToCHWImage()
+            else:
+                raise ValueError("Unknown transform type")
+            processors.append(processor)
+        return processors
+
+
+@dataclass
+class _PyOnlyMultilabelClassificationResult(object):
+    label_ids: List[int]
+    scores: List[float]
+
+
+class _PyOnlyMultilabelClassificationPostprocessor(object):
+    def __init__(self, config):
+        super().__init__()
+        self._threshold = config["threshold"]
+
+    def run(self, data):
+        pred = data["pred"]
+
+        pred_index = np.where(pred >= self._threshold)[0].astype("int32")
+        index = pred_index[np.argsort(pred[pred_index])][::-1]
+        clas_id_list = []
+        score_list = []
+        for i in index:
+            clas_id_list.append(i.item())
+            score_list.append(pred[i].item())
+
+        result = _PyOnlyMultilabelClassificationResult(
+            label_ids=clas_id_list, scores=score_list
+        )
+        return result
+
+
+class PyOnlyMultilabelClassificationModel(PyOnlyVisionModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        self._model_file = model_file
+        self._params_file = params_file
+        self._model_format = model_format
+        super().__init__(runtime_option)
+        self._config = load_config(config_file)
+        self._preprocessor = _PyOnlyMultilabelClassificationPreprocessor(
+            self._config["PreProcess"]["transform_ops"]
+        )
+        self._postprocessor = _PyOnlyMultilabelClassificationPostprocessor(
+            self._config["PostProcess"]["MultiLabelThreshOutput"]
+        )
+
+    def model_name():
+        return "PyOnlyMultilabelImageClassificationModel"
+
+    def batch_predict(self, imgs):
+        data_list = []
+        for img in imgs:
+            data = {"img": img}
+            data = self._preprocessor.run(data)
+            data_list.append(data)
+
+        input_name = self._runtime.get_input_info(0).name
+        imgs = np.stack([data["img"] for data in data_list], axis=0, dtype=np.float32)
+        imgs = np.ascontiguousarray(imgs)
+        output_arrs = self._runtime.infer({input_name: imgs})
+
+        results = []
+        for pred in output_arrs[0]:
+            data = {"pred": pred}
+            result = self._postprocessor.run(data)
+            results.append(result)
+        return results
+
+    def _update_option(self):
+        self._option.set_model_path(
+            self._model_file, self._params_file, self._model_format
+        )
diff --git a/libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py
new file mode 100755
index 0000000000..f5b6e8f96e
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/classification/ppshitu/__init__.py
@@ -0,0 +1,145 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+from ...common import ProcessorManager
+from ...detection.ppdet import PicoDet
+
+
+class PPShiTuV2Detector(PicoDet):
+    """Detect main body from an input image."""
+
+    ...
+
+
+class PPShiTuV2RecognizerPreprocessor(ProcessorManager):
+    def __init__(self, config_file):
+        """Create a preprocessor for PPShiTuV2Recognizer from configuration file
+
+        :param config_file: (str)Path of configuration file, e.g PPLCNet/inference_cls.yaml
+        """
+        super(PPShiTuV2RecognizerPreprocessor, self).__init__()
+        self._manager = C.vision.classification.PPShiTuV2RecognizerPreprocessor(
+            config_file
+        )
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+    def initial_resize_on_cpu(self, v):
+        """
+        When the initial operator is Resize, and input image size is large,
+        maybe it's better to run resize on CPU, because the HostToDevice memcpy
+        is time consuming. Set this True to run the initial resize on CPU.
+        :param: v: True or False
+        """
+        self._manager.initial_resize_on_cpu(v)
+
+
+class PPShiTuV2RecognizerPostprocessor:
+    def __init__(self, topk=1):
+        """Create a postprocessor for PPShiTuV2Recognizer"""
+        self._postprocessor = C.vision.classification.PPShiTuV2RecognizerPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for PPShiTuV2Recognizer
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of ClassifyResult, the feature vector is ClassifyResult.feature (If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class PPShiTuV2Recognizer(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a image PPShiTuV2Recognizer model exported by PaddleClas.
+
+        :param model_file: (str)Path of model file, e.g PPLCNet/inference.pdmodel
+        :param params_file: (str)Path of parameters file, e.g PPLCNet/inference.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str) Path of configuration file for deploy, e.g PPLCNet/inference_cls.yaml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPShiTuV2Recognizer, self).__init__(runtime_option)
+        self._model = C.vision.classification.PPShiTuV2Recognizer(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPShiTuV2Recognizer model initialize failed."
+
+    def clone(self):
+        """Clone PPShiTuV2Recognizer object
+
+        :return: a new PPShiTuV2Recognizer object
+        """
+
+        class PPShiTuV2RecognizerCloneModel(PPShiTuV2Recognizer):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PPShiTuV2RecognizerCloneModel(self._model.clone())
+        return clone_model
+
+    def predict(self, im):
+        """Extract feature from an input image
+
+        :param im: (numpy.ndarray) The input image data, a 3-D array with layout HWC, BGR format
+        :return: ClassifyResult
+        """
+
+        return self._model.predict(im)
+
+    def batch_predict(self, images):
+        """Extract features from a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of ClassifyResult, the feature vector is ClassifyResult.feature
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get PPShiTuV2RecognizerPreprocessor object of the loaded model
+
+        :return PPShiTuV2RecognizerPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get PPShiTuV2RecognizerPostprocessor object of the loaded model
+
+        :return PPShiTuV2RecognizerPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/common/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/common/__init__.py
new file mode 100755
index 0000000000..e14a4b433a
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/common/__init__.py
@@ -0,0 +1,18 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .manager import ProcessorManager
+from .manager import PyProcessorManager
+from .processors import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/common/manager.py b/libs/ultrainfer/python/ultrainfer/vision/common/manager.py
new file mode 100755
index 0000000000..6adc0acac4
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/common/manager.py
@@ -0,0 +1,69 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from abc import ABC, abstractmethod
+from ... import c_lib_wrap as C
+
+
+class ProcessorManager:
+    def __init__(self):
+        self._manager = None
+
+    def run(self, input_ims):
+        """Process input image
+
+        :param: input_ims: (list of numpy.ndarray) The input images
+        :return: list of FDTensor
+        """
+        return self._manager.run(input_ims)
+
+    def use_cuda(self, enable_cv_cuda=False, gpu_id=-1):
+        """Use CUDA processors
+
+        :param: enable_cv_cuda: Ture: use CV-CUDA, False: use CUDA only
+        :param: gpu_id: GPU device id
+        """
+        return self._manager.use_cuda(enable_cv_cuda, gpu_id)
+
+
+class PyProcessorManager(ABC):
+    """
+    PyProcessorManager is used to define a customized processor in python
+    """
+
+    def __init__(self):
+        self._manager = C.vision.processors.ProcessorManager()
+
+    def use_cuda(self, enable_cv_cuda=False, gpu_id=-1):
+        """Use CUDA processors
+
+        :param: enable_cv_cuda: Ture: use CV-CUDA, False: use CUDA only
+        :param: gpu_id: GPU device id
+        """
+        return self._manager.use_cuda(enable_cv_cuda, gpu_id)
+
+    def __call__(self, images):
+        image_batch = C.vision.FDMatBatch()
+        image_batch.from_mats(images)
+
+        self._manager.pre_apply(image_batch)
+        outputs = self.apply(image_batch)
+        self._manager.post_apply()
+        return outputs
+
+    @abstractmethod
+    def apply(self, image_batch):
+        print("This function has to be implemented.")
+        return []
diff --git a/libs/ultrainfer/python/ultrainfer/vision/common/processors.py b/libs/ultrainfer/python/ultrainfer/vision/common/processors.py
new file mode 100755
index 0000000000..ba90fbc2de
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/common/processors.py
@@ -0,0 +1,152 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from ... import c_lib_wrap as C
+
+
+class Processor:
+    def __init__(self):
+        self.processor = None
+
+    def __call__(self, mat):
+        """call for processing input.
+
+        :param mat: The input data FDMat or FDMatBatch.
+        """
+        self.processor(mat)
+
+
+class ResizeByShort(Processor):
+    def __init__(self, target_size: int, interp=1, use_scale=True, max_hw=[]):
+        """Create a ResizeByShort operation with the given parameters.
+
+        :param target_size: The target short size to resize the image
+        :param interp: Optionally, the interpolation mode for resizing image
+        :param use_scale: Optionally, whether to scale image
+        :param max_hw: Max spatial size which is used by ResizeByShort
+        """
+        self.processor = C.vision.processors.ResizeByShort(
+            target_size, interp, use_scale, max_hw
+        )
+
+
+class CenterCrop(Processor):
+    def __init__(self, width, height):
+        """Create a CenterCrop operation with the given parameters.
+
+        :param width: Desired width of the cropped image
+        :param height: Desired height of the cropped image
+        """
+        self.processor = C.vision.processors.CenterCrop(width, height)
+
+
+class Pad(Processor):
+    def __init__(self, top: int, bottom: int, left: int, right: int, value=[]):
+        """Create a Pad operation with the given parameters.
+
+        :param top: The top padding
+        :param bottom: The bottom padding
+        :param left: The left padding
+        :param right: The right padding
+        :param value: the value that is used to pad on the input image
+        """
+        self.processor = C.vision.processors.Pad(top, bottom, left, right, value)
+
+
+class NormalizeAndPermute(Processor):
+    def __init__(self, mean=[], std=[], is_scale=True, min=[], max=[], swap_rb=False):
+        """Creae a Normalize and a Permute operation with the given parameters.
+
+        :param mean: A list containing the mean of each channel
+        :param std: A list containing the standard deviation of each channel
+        :param is_scale: Specifies if the image are being scaled or not
+        :param min: A list containing the minimum value of each channel
+        :param max: A list containing the maximum value of each channel
+        """
+        self.processor = C.vision.processors.NormalizeAndPermute(
+            mean, std, is_scale, min, max, swap_rb
+        )
+
+
+class Cast(Processor):
+    def __init__(self, dtype="float"):
+        """Creat a new cast opereaton with given dtype
+
+        :param dtype: Target dtype of the output
+        """
+        self.processor = C.vision.processors.Cast(dtype)
+
+
+class HWC2CHW(Processor):
+    def __init__(self):
+        """Creat a new hwc2chw processor with default dtype.
+
+        :return An instance of processor `HWC2CHW`
+        """
+        self.processor = C.vision.processors.HWC2CHW()
+
+
+class Normalize(Processor):
+    def __init__(self, mean, std, is_scale=True, min=[], max=[], swap_rb=False):
+        """Creat a new normalize opereator with given paremeters.
+
+        :param mean: A list containing the mean of each channel
+        :param std: A list containing the standard deviation of each channel
+        :param is_scale: Specifies if the image are being scaled or not
+        :param min: A list containing the minimum value of each channel
+        :param max: A list containing the maximum value of each channel
+        """
+        self.processor = C.vision.processors.Normalize(
+            mean, std, is_scale, min, max, swap_rb
+        )
+
+
+class PadToSize(Processor):
+    def __init__(self, width, height, value=[]):
+        """Create a new PadToSize opereator with given parameters.
+
+        :param width: Desired width of the output image
+        :param height: Desired height of the output image
+        :param value: Values to pad with
+        """
+        self.processor = C.vision.processors.PadToSize(width, height, value)
+
+
+class Resize(Processor):
+    def __init__(
+        self, width, height, scale_w=-1.0, scale_h=-1.0, interp=1, use_scale=False
+    ):
+        """Create a Resize operation with the given parameters.
+
+        :param width: Desired width of the output image
+        :param height: Desired height of the output image
+        :param scale_w: Scales the width in x-direction
+        :param scale_h: Scales the height in y-direction
+        :param interp: Optionally, the interpolation mode for resizing image
+        :param use_scale: Optionally, whether to scale image
+        """
+        self.processor = C.vision.processors.Resize(
+            width, height, scale_w, scale_h, interp, use_scale
+        )
+
+
+class StridePad(Processor):
+    def __init__(self, stride, value=[]):
+        """Create a StridePad processor with given parameters.
+
+        :param stride: Stride of the processor
+        :param value: Values to pad with
+        """
+        self.processor = C.vision.processors.StridePad(stride, value)
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py
new file mode 100755
index 0000000000..61a372258e
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/__init__.py
@@ -0,0 +1,30 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib.yolov7 import *
+from .contrib.yolor import YOLOR
+from .contrib.scaled_yolov4 import ScaledYOLOv4
+from .contrib.nanodet_plus import NanoDetPlus
+from .contrib.yolox import YOLOX
+from .contrib.yolov5 import *
+from .contrib.yolov5seg import *
+from .contrib.fastestdet import *
+from .contrib.yolov5lite import YOLOv5Lite
+from .contrib.yolov6 import YOLOv6
+from .contrib.yolov7end2end_trt import YOLOv7End2EndTRT
+from .contrib.yolov7end2end_ort import YOLOv7End2EndORT
+from .contrib.yolov8 import *
+from .ppdet import *
+from .contrib.rkyolo import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py
new file mode 100755
index 0000000000..e10276afd7
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/fastestdet.py
@@ -0,0 +1,157 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class FastestDetPreprocessor:
+    def __init__(self):
+        """Create a preprocessor for FastestDet"""
+        self._preprocessor = C.vision.detection.FastestDetPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for FastestDet
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [352, 352]
+        """
+        return self._preprocessor.size
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+
+class FastestDetPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for FastestDet"""
+        self._postprocessor = C.vision.detection.FastestDetPostprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for FastestDet
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.65
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.45
+        """
+        return self._postprocessor.nms_threshold
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+
+class FastestDet(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a FastestDet model exported by FastestDet.
+
+        :param model_file: (str)Path of model file, e.g ./FastestDet.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(FastestDet, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.ONNX
+        ), "FastestDet only support model format of ModelFormat.ONNX now."
+        self._model = C.vision.detection.FastestDet(
+            model_file, params_file, self._runtime_option, model_format
+        )
+
+        assert self.initialized, "FastestDet initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: DetectionResult
+        """
+        assert input_image is not None, "Input image is None."
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        assert len(images) == 1, "FastestDet is only support 1 image in batch_predict"
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get FastestDetPreprocessor object of the loaded model
+
+        :return FastestDetPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get FastestDetPostprocessor object of the loaded model
+
+        :return FastestDetPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py
new file mode 100755
index 0000000000..cd82537536
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/nanodet_plus.py
@@ -0,0 +1,135 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class NanoDetPlus(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a NanoDetPlus model exported by NanoDet.
+
+        :param model_file: (str)Path of model file, e.g ./nanodet.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(NanoDetPlus, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.NanoDetPlus(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "NanoDetPlus initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟NanoDetPlus模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [416, 416]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height),  default (320, 320)
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def keep_ratio(self):
+        # keep aspect ratio or not when perform resize operation. This option is set as false by default in NanoDet-Plus
+        return self._model.keep_ratio
+
+    @property
+    def downsample_strides(self):
+        # downsample strides for NanoDet-Plus to generate anchors, will take (8, 16, 32, 64) as default values
+        return self._model.downsample_strides
+
+    @property
+    def max_wh(self):
+        # for offseting the boxes by classes when using NMS, default 4096
+        return self._model.max_wh
+
+    @property
+    def reg_max(self):
+        """
+        reg_max for GFL regression, default 7
+        """
+        return self._model.reg_max
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @keep_ratio.setter
+    def keep_ratio(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `keep_ratio` must be type of bool."
+        self._model.keep_ratio = value
+
+    @downsample_strides.setter
+    def downsample_strides(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `downsample_strides` must be type of list."
+        self._model.downsample_strides = value
+
+    @max_wh.setter
+    def max_wh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `max_wh` must be type of float."
+        self._model.max_wh = value
+
+    @reg_max.setter
+    def reg_max(self, value):
+        assert isinstance(value, int), "The value to set `reg_max` must be type of int."
+        self._model.reg_max = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py
new file mode 100755
index 0000000000..9b77f54ab8
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .rkyolov5 import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py
new file mode 100755
index 0000000000..cdf0a715b1
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/rkyolo/rkyolov5.py
@@ -0,0 +1,315 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from ..... import UltraInferModel, ModelFormat
+from ..... import c_lib_wrap as C
+
+
+class RKYOLOPreprocessor:
+    def __init__(self):
+        """Create a preprocessor for RKYOLOV5"""
+        self._preprocessor = C.vision.detection.RKYOLOPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for RKYOLOV5
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @property
+    def padding_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
+        #  padding value, size should be the same as channels
+        return self._preprocessor.padding_value
+
+    @property
+    def is_scale_up(self):
+        """
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+        """
+        return self._preprocessor.is_scale_up
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._preprocessor.padding_value = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
+
+
+class RKYOLOPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for RKYOLOV5"""
+        self._postprocessor = C.vision.detection.RKYOLOPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for RKYOLOV5
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+    def set_anchor(self, anchor):
+        self._postprocessor.set_anchor(anchor)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.25
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.nms_threshold
+
+    @property
+    def class_num(self):
+        """
+        class_num for postprocessing, default is 80
+        """
+        return self._postprocessor.class_num
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+    @class_num.setter
+    def class_num(self, class_num):
+        """
+        class_num for postprocessing, default is 80
+        """
+        assert isinstance(
+            class_num, int
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.class_num = class_num
+
+
+class RKYOLOV5(UltraInferModel):
+    def __init__(self, model_file, runtime_option=None, model_format=ModelFormat.RKNN):
+        """Load a RKYOLOV5 model exported by RKYOLOV5.
+
+        :param model_file: (str)Path of model file, e.g ./yolov5.rknn
+        :param params_file: (str)Path of parameters file, e.g , if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(RKYOLOV5, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.RKYOLOV5(
+            model_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "RKYOLOV5 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        self.postprocessor.conf_threshold = conf_threshold
+        self.postprocessor.nms_threshold = nms_iou_threshold
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get RKYOLOV5Preprocessor object of the loaded model
+
+        :return RKYOLOV5Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get RKYOLOV5Postprocessor object of the loaded model
+
+        :return RKYOLOV5Postprocessor
+        """
+        return self._model.postprocessor
+
+
+class RKYOLOX(UltraInferModel):
+    def __init__(self, model_file, runtime_option=None, model_format=ModelFormat.RKNN):
+        """Load a RKYOLOX model exported by RKYOLOX.
+
+        :param model_file: (str)Path of model file, e.g ./yolox.rknn
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(RKYOLOX, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.RKYOLOX(
+            model_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "RKYOLOV5 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        self.postprocessor.conf_threshold = conf_threshold
+        self.postprocessor.nms_threshold = nms_iou_threshold
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get RKYOLOV5Preprocessor object of the loaded model
+
+        :return RKYOLOV5Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get RKYOLOV5Postprocessor object of the loaded model
+
+        :return RKYOLOV5Postprocessor
+        """
+        return self._model.postprocessor
+
+
+class RKYOLOV7(UltraInferModel):
+    def __init__(self, model_file, runtime_option=None, model_format=ModelFormat.RKNN):
+        """Load a RKYOLOX model exported by RKYOLOV7.
+
+        :param model_file: (str)Path of model file, e.g ./yolov7.rknn
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(RKYOLOV7, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.RKYOLOV7(
+            model_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "RKYOLOV5 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        self.postprocessor.conf_threshold = conf_threshold
+        self.postprocessor.nms_threshold = nms_iou_threshold
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get RKYOLOV5Preprocessor object of the loaded model
+
+        :return RKYOLOV5Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get RKYOLOV5Postprocessor object of the loaded model
+
+        :return RKYOLOV5Postprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py
new file mode 100755
index 0000000000..84dad54fa5
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/scaled_yolov4.py
@@ -0,0 +1,146 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class ScaledYOLOv4(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a ScaledYOLOv4 model exported by ScaledYOLOv4.
+
+        :param model_file: (str)Path of model file, e.g ./scaled_yolov4.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(ScaledYOLOv4, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.ScaledYOLOv4(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "ScaledYOLOv4 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟ScaledYOLOv4模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @property
+    def max_wh(self):
+        # for offseting the boxes by classes when using NMS
+        return self._model.max_wh
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
+
+    @max_wh.setter
+    def max_wh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `max_wh` must be type of float."
+        self._model.max_wh = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py
new file mode 100755
index 0000000000..1c4cfae6d5
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolor.py
@@ -0,0 +1,145 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOR(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOR model exported by YOLOR
+
+        :param model_file: (str)Path of model file, e.g ./yolor.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOR, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOR(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOR initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟YOLOR模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @property
+    def max_wh(self):
+        # for offseting the boxes by classes when using NMS
+        return self._model.max_wh
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
+
+    @max_wh.setter
+    def max_wh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `max_wh` must be type of float."
+        self._model.max_wh = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py
new file mode 100755
index 0000000000..1ec1da0d80
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5.py
@@ -0,0 +1,227 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv5Preprocessor:
+    def __init__(self):
+        """Create a preprocessor for YOLOv5"""
+        self._preprocessor = C.vision.detection.YOLOv5Preprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for YOLOv5
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @property
+    def padding_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
+        #  padding value, size should be the same as channels
+        return self._preprocessor.padding_value
+
+    @property
+    def is_scale_up(self):
+        """
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+        """
+        return self._preprocessor.is_scale_up
+
+    @property
+    def is_mini_pad(self):
+        """
+        is_mini_pad for preprocessing, pad to the minimum rectange which height and width is times of stride, default false
+        """
+        return self._preprocessor.is_mini_pad
+
+    @property
+    def stride(self):
+        """
+        stride for preprocessing, only for mini_pad mode, default 32
+        """
+        return self._preprocessor.stride
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._preprocessor.padding_value = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._preprocessor.is_mini_pad = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._preprocessor.stride = value
+
+
+class YOLOv5Postprocessor:
+    def __init__(self):
+        """Create a postprocessor for YOLOv5"""
+        self._postprocessor = C.vision.detection.YOLOv5Postprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for YOLOv5
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.25
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.nms_threshold
+
+    @property
+    def multi_label(self):
+        """
+        multi_label for postprocessing, set true for eval, default is True
+        """
+        return self._postprocessor.multi_label
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+    @multi_label.setter
+    def multi_label(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `multi_label` must be type of bool."
+        self._postprocessor.multi_label = value
+
+
+class YOLOv5(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv5 model exported by YOLOv5.
+
+        :param model_file: (str)Path of model file, e.g ./yolov5.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv5, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv5(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv5 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        self.postprocessor.conf_threshold = conf_threshold
+        self.postprocessor.nms_threshold = nms_iou_threshold
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get YOLOv5Preprocessor object of the loaded model
+
+        :return YOLOv5Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get YOLOv5Postprocessor object of the loaded model
+
+        :return YOLOv5Postprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py
new file mode 100755
index 0000000000..2cd7d7d878
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5lite.py
@@ -0,0 +1,191 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv5Lite(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv5Lite model exported by YOLOv5Lite.
+
+        :param model_file: (str)Path of model file, e.g ./yolov5lite.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv5Lite, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv5Lite(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv5Lite initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟YOLOv5Lite模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @property
+    def max_wh(self):
+        # for offseting the boxes by classes when using NMS
+        return self._model.max_wh
+
+    @property
+    def is_decode_exported(self):
+        """
+        whether the model_file was exported with decode module.
+        The official YOLOv5Lite/export.py script will export ONNX file without decode module.
+        Please set it 'true' manually if the model file was exported with decode module.
+        False : ONNX files without decode module. True : ONNX file with decode module.
+        default False
+        """
+        return self._model.is_decode_exported
+
+    @property
+    def anchor_config(self):
+        return self._model.anchor_config
+
+    @property
+    def downsample_strides(self):
+        """
+        downsample strides for YOLOv5Lite to generate anchors, will take (8,16,32) as default values, might have stride=64.
+        """
+        return self._model.downsample_strides
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
+
+    @max_wh.setter
+    def max_wh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `max_wh` must be type of float."
+        self._model.max_wh = value
+
+    @is_decode_exported.setter
+    def is_decode_exported(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_decode_exported` must be type of bool."
+        self._model.is_decode_exported = value
+
+    @anchor_config.setter
+    def anchor_config(self, anchor_config_val):
+        assert isinstance(
+            anchor_config_val, list
+        ), "The value to set `anchor_config` must be type of tuple or list."
+        assert isinstance(
+            anchor_config_val[0], list
+        ), "The value to set `anchor_config` must be 2-dimensions tuple or list"
+        self._model.anchor_config = anchor_config_val
+
+    @downsample_strides.setter
+    def downsample_strides(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `downsample_strides` must be type of list."
+        self._model.downsample_strides = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py
new file mode 100755
index 0000000000..0c304bd1d6
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov5seg.py
@@ -0,0 +1,222 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv5SegPreprocessor:
+    def __init__(self):
+        """Create a preprocessor for YOLOv5Seg"""
+        self._preprocessor = C.vision.detection.YOLOv5SegPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for YOLOv5Seg
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @property
+    def padding_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
+        #  padding value, size should be the same as channels
+        return self._preprocessor.padding_value
+
+    @property
+    def is_scale_up(self):
+        """
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+        """
+        return self._preprocessor.is_scale_up
+
+    @property
+    def is_mini_pad(self):
+        """
+        is_mini_pad for preprocessing, pad to the minimum rectange which height and width is times of stride, default false
+        """
+        return self._preprocessor.is_mini_pad
+
+    @property
+    def stride(self):
+        """
+        stride for preprocessing, only for mini_pad mode, default 32
+        """
+        return self._preprocessor.stride
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._preprocessor.padding_value = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._preprocessor.is_mini_pad = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._preprocessor.stride = value
+
+
+class YOLOv5SegPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for YOLOv5Seg"""
+        self._postprocessor = C.vision.detection.YOLOv5SegPostprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for YOLOv5Seg
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.25
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.nms_threshold
+
+    @property
+    def multi_label(self):
+        """
+        multi_label for postprocessing, set true for eval, default is True
+        """
+        return self._postprocessor.multi_label
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+    @multi_label.setter
+    def multi_label(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `multi_label` must be type of bool."
+        self._postprocessor.multi_label = value
+
+
+class YOLOv5Seg(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv5Seg model exported by YOLOv5.
+
+        :param model_file: (str)Path of model file, e.g ./yolov5s-seg.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(YOLOv5Seg, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv5Seg(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "YOLOv5Seg initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get YOLOv5SegPreprocessor object of the loaded model
+
+        :return YOLOv5SegPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get YOLOv5SegPostprocessor object of the loaded model
+
+        :return YOLOv5SegPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py
new file mode 100755
index 0000000000..aed7e7f197
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov6.py
@@ -0,0 +1,145 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv6(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv6 model exported by YOLOv6.
+
+        :param model_file: (str)Path of model file, e.g ./yolov6.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv6, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv6(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv6 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟YOLOv6模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @property
+    def max_wh(self):
+        # for offseting the boxes by classes when using NMS
+        return self._model.max_wh
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
+
+    @max_wh.setter
+    def max_wh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `max_wh` must be type of float."
+        self._model.max_wh = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py
new file mode 100755
index 0000000000..9b9c63a8ae
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7.py
@@ -0,0 +1,187 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv7Preprocessor:
+    def __init__(self):
+        """Create a preprocessor for YOLOv7"""
+        self._preprocessor = C.vision.detection.YOLOv7Preprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for YOLOv7
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @property
+    def padding_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
+        #  padding value, size should be the same as channels
+        return self._preprocessor.padding_value
+
+    @property
+    def is_scale_up(self):
+        """
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+        """
+        return self._preprocessor.is_scale_up
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._preprocessor.padding_value = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
+
+
+class YOLOv7Postprocessor:
+    def __init__(self):
+        """Create a postprocessor for YOLOv7"""
+        self._postprocessor = C.vision.detection.YOLOv7Postprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for YOLOv7
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.25
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.nms_threshold
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+
+class YOLOv7(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv7 model exported by YOLOv7.
+
+        :param model_file: (str)Path of model file, e.g ./yolov7.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv7, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv7(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv7 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        self.postprocessor.conf_threshold = conf_threshold
+        self.postprocessor.nms_threshold = nms_iou_threshold
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get YOLOv7Preprocessor object of the loaded model
+
+        :return YOLOv7Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get YOLOv7Postprocessor object of the loaded model
+
+        :return YOLOv7Postprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py
new file mode 100755
index 0000000000..708a1cd36c
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_ort.py
@@ -0,0 +1,132 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv7End2EndORT(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv7End2EndORT model exported by YOLOv7.
+
+        :param model_file: (str)Path of model file, e.g ./yolov7end2end_ort.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv7End2EndORT, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv7End2EndORT(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv7End2End initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold)
+
+    # 一些跟模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py
new file mode 100755
index 0000000000..9b91f70f1f
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov7end2end_trt.py
@@ -0,0 +1,132 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv7End2EndTRT(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv7End2EndTRT model exported by YOLOv7.
+
+        :param model_file: (str)Path of model file, e.g ./yolov7end2end_trt.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv7End2EndTRT, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv7End2EndTRT(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv7End2EndTRT initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold)
+
+    # 一些跟模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py
new file mode 100755
index 0000000000..e1ceb82fd4
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolov8.py
@@ -0,0 +1,222 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv8Preprocessor:
+    def __init__(self):
+        """Create a preprocessor for YOLOv8"""
+        self._preprocessor = C.vision.detection.YOLOv8Preprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for YOLOv8
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @property
+    def padding_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
+        #  padding value, size should be the same as channels
+        return self._preprocessor.padding_value
+
+    @property
+    def is_scale_up(self):
+        """
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+        """
+        return self._preprocessor.is_scale_up
+
+    @property
+    def is_mini_pad(self):
+        """
+        is_mini_pad for preprocessing, pad to the minimum rectange which height and width is times of stride, default false
+        """
+        return self._preprocessor.is_mini_pad
+
+    @property
+    def stride(self):
+        """
+        stride for preprocessing, only for mini_pad mode, default 32
+        """
+        return self._preprocessor.stride
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._preprocessor.padding_value = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._preprocessor.is_mini_pad = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._preprocessor.stride = value
+
+
+class YOLOv8Postprocessor:
+    def __init__(self):
+        """Create a postprocessor for YOLOv8"""
+        self._postprocessor = C.vision.detection.YOLOv8Postprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for YOLOv8
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.25
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.nms_threshold
+
+    @property
+    def multi_label(self):
+        """
+        multi_label for postprocessing, set true for eval, default is True
+        """
+        return self._postprocessor.multi_label
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+    @multi_label.setter
+    def multi_label(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `multi_label` must be type of bool."
+        self._postprocessor.multi_label = value
+
+
+class YOLOv8(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv8 model exported by YOLOv8.
+
+        :param model_file: (str)Path of model file, e.g ./yolov8s.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(YOLOv8, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOv8(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "YOLOv8 initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get YOLOv8Preprocessor object of the loaded model
+
+        :return YOLOv8Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get YOLOv8Postprocessor object of the loaded model
+
+        :return YOLOv8Postprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py
new file mode 100755
index 0000000000..09ac32981b
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/contrib/yolox.py
@@ -0,0 +1,130 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOX(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOX model exported by YOLOX.
+
+        :param model_file: (str)Path of model file, e.g ./yolox.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOX, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.YOLOX(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOX initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟YOLOX模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_decode_exported(self):
+        """
+        whether the model_file was exported with decode module.
+        The official YOLOX/tools/export_onnx.py script will export ONNX file without decode module.
+        Please set it 'true' manually if the model file was exported with decode module.
+        Defalut False.
+        """
+        return self._model.is_decode_exported
+
+    @property
+    def downsample_strides(self):
+        """
+        downsample strides for YOLOX to generate anchors, will take (8,16,32) as default values, might have stride=64.
+        """
+        return self._model.downsample_strides
+
+    @property
+    def max_wh(self):
+        # for offseting the boxes by classes when using NMS
+        return self._model.max_wh
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_decode_exported.setter
+    def is_decode_exported(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_decode_exported` must be type of bool."
+        self._model.is_decode_exported = value
+
+    @downsample_strides.setter
+    def downsample_strides(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `downsample_strides` must be type of list."
+        self._model.downsample_strides = value
+
+    @max_wh.setter
+    def max_wh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `max_wh` must be type of float."
+        self._model.max_wh = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py
new file mode 100755
index 0000000000..5e33bee662
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/detection/ppdet/__init__.py
@@ -0,0 +1,990 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from typing import Union, List
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+from ...common import ProcessorManager
+
+
+class PaddleDetPreprocessor(ProcessorManager):
+    def __init__(self, config_file):
+        """Create a preprocessor for PaddleDetection Model from configuration file
+
+        :param config_file: (str)Path of configuration file, e.g ppyoloe/infer_cfg.yml
+        """
+        self._manager = C.vision.detection.PaddleDetPreprocessor(config_file)
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+
+class NMSOption:
+    def __init__(self):
+        self.nms_option = C.vision.detection.NMSOption()
+
+    @property
+    def background_label(self):
+        return self.nms_option.background_label
+
+
+class NMSRotatedOption:
+    def __init__(self):
+        self.nms_rotated_option = C.vision.detection.NMSRotatedOption()
+
+    @property
+    def background_label(self):
+        return self.nms_rotated_option.background_label
+
+
+class PaddleDetPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for PaddleDetection Model"""
+        self._postprocessor = C.vision.detection.PaddleDetPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for PaddleDetection Model
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of ClassifyResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+    def apply_nms(self):
+        self._postprocessor.apply_nms()
+
+    def set_nms_option(self, nms_option=None):
+        """This function will enable decode and nms in postprocess step."""
+        if nms_option is None:
+            nms_option = NMSOption()
+        self._postprocessor.set_nms_option(self, nms_option.nms_option)
+
+    def set_nms_rotated_option(self, nms_rotated_option=None):
+        """This function will enable decode and rotated nms in postprocess step."""
+        if nms_rotated_option is None:
+            nms_rotated_option = NMSRotatedOption()
+        self._postprocessor.set_nms_rotated_option(
+            self, nms_rotated_option.nms_rotated_option
+        )
+
+
+class PPYOLOE(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PPYOLOE model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g ppyoloe/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ppyoloe/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.PPYOLOE(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPYOLOE model initialize failed."
+
+    def predict(self, im):
+        """Detect an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: DetectionResult
+        """
+
+        assert im is not None, "The input image data is None."
+        return self._model.predict(im)
+
+    def batch_predict(self, images):
+        """Detect a batch of input image list
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    def clone(self):
+        """Clone PPYOLOE object
+
+        :return: a new PPYOLOE object
+        """
+
+        class PPYOLOEClone(PPYOLOE):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PPYOLOEClone(self._model.clone())
+        return clone_model
+
+    @property
+    def preprocessor(self):
+        """Get PaddleDetPreprocessor object of the loaded model
+
+        :return PaddleDetPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get PaddleDetPostprocessor object of the loaded model
+
+        :return PaddleDetPostprocessor
+        """
+        return self._model.postprocessor
+
+
+class PPYOLO(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PPYOLO model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g ppyolo/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ppyolo/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PPYOLO model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.PPYOLO(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPYOLO model initialize failed."
+
+    def clone(self):
+        """Clone PPYOLO object
+
+        :return: a new PPYOLO object
+        """
+
+        class PPYOLOClone(PPYOLO):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PPYOLOClone(self._model.clone())
+        return clone_model
+
+
+class PaddleYOLOX(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a YOLOX model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g yolox/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PaddleYOLOX model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.PaddleYOLOX(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleYOLOX model initialize failed."
+
+    def clone(self):
+        """Clone PaddleYOLOX object
+
+        :return: a new PaddleYOLOX object
+        """
+
+        class PaddleYOLOXClone(PaddleYOLOX):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PaddleYOLOXClone(self._model.clone())
+        return clone_model
+
+
+class PicoDet(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PicoDet model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g picodet/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g picodet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.PicoDet(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PicoDet model initialize failed."
+
+    def clone(self):
+        """Clone PicoDet object
+
+        :return: a new PicoDet object
+        """
+
+        class PicoDetClone(PicoDet):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PicoDetClone(self._model.clone())
+        return clone_model
+
+
+class FasterRCNN(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a FasterRCNN model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g fasterrcnn/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g fasterrcnn/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "FasterRCNN model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.FasterRCNN(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "FasterRCNN model initialize failed."
+
+    def clone(self):
+        """Clone FasterRCNN object
+
+        :return: a new FasterRCNN object
+        """
+
+        class FasterRCNNClone(FasterRCNN):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = FasterRCNNClone(self._model.clone())
+        return clone_model
+
+
+class YOLOv3(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a YOLOv3 model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g yolov3/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g yolov3/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "YOLOv3 model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.YOLOv3(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "YOLOv3 model initialize failed."
+
+    def clone(self):
+        """Clone YOLOv3 object
+
+        :return: a new YOLOv3 object
+        """
+
+        class YOLOv3Clone(YOLOv3):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = YOLOv3Clone(self._model.clone())
+        return clone_model
+
+
+class SOLOv2(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a SOLOv2 model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g solov2/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g solov2/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g solov2/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "SOLOv2 model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.SOLOv2(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "SOLOv2 model initialize failed."
+
+    def clone(self):
+        """Clone SOLOv2 object
+
+        :return: a new SOLOv2 object
+        """
+
+        class SOLOv2Clone(SOLOv2):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = SOLOv2Clone(self._model.clone())
+        return clone_model
+
+
+class MaskRCNN(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a MaskRCNN model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g fasterrcnn/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g fasterrcnn/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "MaskRCNN model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.MaskRCNN(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "MaskRCNN model initialize failed."
+
+    def batch_predict(self, images):
+        """Detect a batch of input image list, batch_predict is not supported for maskrcnn now.
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        raise Exception("batch_predict is not supported for MaskRCNN model now.")
+
+    def clone(self):
+        """Clone MaskRCNN object
+
+        :return: a new MaskRCNN object
+        """
+
+        class MaskRCNNClone(MaskRCNN):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = MaskRCNNClone(self._model.clone())
+        return clone_model
+
+
+class SSD(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a SSD model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g ssd/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ssd/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "SSD model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.SSD(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "SSD model initialize failed."
+
+    def clone(self):
+        """Clone SSD object
+
+        :return: a new SSD object
+        """
+
+        class SSDClone(SSD):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = SSDClone(self._model.clone())
+        return clone_model
+
+
+class PaddleYOLOv5(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a YOLOv5 model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g yolov5/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g yolov5/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PaddleYOLOv5 model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.PaddleYOLOv5(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleYOLOv5 model initialize failed."
+
+
+class PaddleYOLOv6(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a YOLOv6 model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g yolov6/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g yolov6/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PaddleYOLOv6 model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.PaddleYOLOv6(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleYOLOv6 model initialize failed."
+
+
+class PaddleYOLOv7(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a YOLOv7 model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g yolov7/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g yolov7/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PaddleYOLOv7 model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.PaddleYOLOv7(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleYOLOv7 model initialize failed."
+
+
+class PaddleYOLOv8(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a YOLOv8 model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g yolov8/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g yolov8/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g yolov8/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.PaddleYOLOv8(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleYOLOv8 model initialize failed."
+
+
+class RTMDet(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a RTMDet model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g rtmdet/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g rtmdet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "RTMDet model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.RTMDet(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "RTMDet model initialize failed."
+
+
+class CascadeRCNN(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a CascadeRCNN model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g cascadercnn/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g cascadercnn/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "CascadeRCNN model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.CascadeRCNN(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "CascadeRCNN model initialize failed."
+
+
+class PSSDet(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PSSDet model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g pssdet/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g pssdet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PSSDet model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.PSSDet(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PSSDet model initialize failed."
+
+
+class RetinaNet(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a RetinaNet model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g retinanet/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g retinanet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "RetinaNet model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.RetinaNet(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "RetinaNet model initialize failed."
+
+
+class PPYOLOESOD(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PPYOLOESOD model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g ppyoloesod/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ppyoloesod/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PPYOLOESOD model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.PPYOLOESOD(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPYOLOESOD model initialize failed."
+
+
+class FCOS(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a FCOS model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g fcos/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g fcos/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "FCOS model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.FCOS(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "FCOS model initialize failed."
+
+
+class TTFNet(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a TTFNet model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g ttfnet/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ttfnet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "TTFNet model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.TTFNet(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "TTFNet model initialize failed."
+
+
+class TOOD(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a TOOD model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g tood/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g tood/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "TOOD model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.TOOD(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "TOOD model initialize failed."
+
+
+class GFL(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a GFL model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g gfl/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g gfl/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "GFL model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.detection.GFL(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "GFL model initialize failed."
+
+
+class PaddleDetectionModel(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PaddleDetectionModel model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g ppyoloe/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ppyoloe/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PaddleDetectionModel, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.PaddleDetectionModel(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleDetectionModel model initialize failed."
+
+    def predict(self, im):
+        """Detect an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: DetectionResult
+        """
+
+        assert im is not None, "The input image data is None."
+        return self._model.predict(im)
+
+    def batch_predict(self, images):
+        """Detect a batch of input image list
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    def clone(self):
+        """Clone PPYOLOE object
+
+        :return: a new PPYOLOE object
+        """
+
+        class PPYOLOEClone(PPYOLOE):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PPYOLOEClone(self._model.clone())
+        return clone_model
+
+    @property
+    def preprocessor(self):
+        """Get PaddleDetPreprocessor object of the loaded model
+
+        :return PaddleDetPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get PaddleDetPostprocessor object of the loaded model
+
+        :return PaddleDetPostprocessor
+        """
+        return self._model.postprocessor
+
+
+class PPYOLOER(PPYOLOE):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PPYOLOER model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g ppyoloe_r/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ppyoloe_r/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe_r/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(PPYOLOE, self).__init__(runtime_option)
+
+        self._model = C.vision.detection.PPYOLOER(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PicoDet model initialize failed."
+
+    def clone(self):
+        """Clone PPYOLOER object
+
+        :return: a new PPYOLOER object
+        """
+
+        class PPYOLOERClone(PPYOLOER):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PPYOLOERClone(self._model.clone())
+        return clone_model
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py
new file mode 100755
index 0000000000..3b2bbf6518
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/__init__.py
@@ -0,0 +1,17 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from .classify import eval_classify
+from .detection import eval_detection
+from .segmentation import eval_segmentation
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py
new file mode 100755
index 0000000000..f39cdf5e59
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/classify.py
@@ -0,0 +1,79 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import os
+import re
+import time
+import collections
+
+
+def topk_accuracy(topk_list, label_list):
+    match_array = np.logical_or.reduce(topk_list == label_list, axis=1)
+    topk_acc_score = match_array.sum() / match_array.shape[0]
+    return topk_acc_score
+
+
+def eval_classify(model, image_file_path, label_file_path, topk=5):
+    from tqdm import trange
+    import cv2
+    import math
+
+    result_list = []
+    label_list = []
+    image_label_dict = {}
+    assert os.path.isdir(
+        image_file_path
+    ), "The image_file_path:{} is not a directory.".format(image_file_path)
+    assert os.path.isfile(
+        label_file_path
+    ), "The label_file_path:{} is not a file.".format(label_file_path)
+    assert isinstance(topk, int), "The tok:{} is not int type".format(topk)
+
+    with open(label_file_path, "r") as file:
+        lines = file.readlines()
+        for line in lines:
+            items = line.strip().split()
+            image_name = items[0]
+            label = items[1]
+            image_label_dict[image_name] = int(label)
+    images_num = len(image_label_dict)
+    twenty_percent_images_num = math.ceil(images_num * 0.2)
+    start_time = 0
+    end_time = 0
+    average_inference_time = 0
+    scores = collections.OrderedDict()
+    for (image, label), i in zip(
+        image_label_dict.items(), trange(images_num, desc="Inference Progress")
+    ):
+        if i == twenty_percent_images_num:
+            start_time = time.time()
+
+        label_list.append([label])
+        image_path = os.path.join(image_file_path, image)
+        im = cv2.imread(image_path)
+        result = model.predict(im, topk)
+        result_list.append(result.label_ids)
+        if i == images_num - 1:
+            end_time = time.time()
+    average_inference_time = round(
+        (end_time - start_time) / (images_num - twenty_percent_images_num), 4
+    )
+    topk_acc_score = topk_accuracy(np.array(result_list), np.array(label_list))
+    if topk == 1:
+        scores.update({"topk1": topk_acc_score})
+        scores.update({"topk1_average_inference_time(s)": average_inference_time})
+    elif topk == 5:
+        scores.update({"topk5": topk_acc_score})
+        scores.update({"topk5_average_inference_time(s)": average_inference_time})
+    return scores
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py
new file mode 100755
index 0000000000..00f228762b
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/detection.py
@@ -0,0 +1,125 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import copy
+import collections
+import math
+
+
+def eval_detection(
+    model,
+    data_dir,
+    ann_file,
+    conf_threshold=None,
+    nms_iou_threshold=None,
+    plot=False,
+    batch_size=1,
+):
+    from .utils import CocoDetection
+    from .utils import COCOMetric
+    import cv2
+    from tqdm import trange
+    import time
+
+    if conf_threshold is not None or nms_iou_threshold is not None:
+        assert (
+            conf_threshold is not None and nms_iou_threshold is not None
+        ), "The conf_threshold and nms_iou_threshold should be setted at the same time"
+        assert isinstance(
+            conf_threshold, (float, int)
+        ), "The conf_threshold:{} need to be int or float".format(conf_threshold)
+        assert isinstance(
+            nms_iou_threshold, (float, int)
+        ), "The nms_iou_threshold:{} need to be int or float".format(nms_iou_threshold)
+    eval_dataset = CocoDetection(data_dir=data_dir, ann_file=ann_file, shuffle=False)
+    all_image_info = eval_dataset.file_list
+    image_num = eval_dataset.num_samples
+    eval_dataset.data_fields = {
+        "im_id",
+        "image_shape",
+        "image",
+        "gt_bbox",
+        "gt_class",
+        "is_crowd",
+    }
+    eval_metric = COCOMetric(
+        coco_gt=copy.deepcopy(eval_dataset.coco_gt), classwise=False
+    )
+    scores = collections.OrderedDict()
+    twenty_percent_image_num = math.ceil(image_num * 0.2)
+    start_time = 0
+    end_time = 0
+    average_inference_time = 0
+    im_list = list()
+    im_id_list = list()
+    for image_info, i in zip(
+        all_image_info, trange(image_num, desc="Inference Progress")
+    ):
+        if i == twenty_percent_image_num:
+            start_time = time.time()
+        im = cv2.imread(image_info["image"])
+        im_id = image_info["im_id"]
+        if batch_size == 1:
+            if conf_threshold is None and nms_iou_threshold is None:
+                result = model.predict(im.copy())
+            else:
+                result = model.predict(im, conf_threshold, nms_iou_threshold)
+            pred = {
+                "bbox": [
+                    [c] + [s] + b
+                    for b, s, c in zip(result.boxes, result.scores, result.label_ids)
+                ],
+                "bbox_num": len(result.boxes),
+                "im_id": im_id,
+            }
+            eval_metric.update(im_id, pred)
+        else:
+            im_list.append(im)
+            im_id_list.append(im_id)
+            # If the batch_size is not satisfied, the remaining pictures are formed into a batch
+            if (i + 1) % batch_size != 0 and i != image_num - 1:
+                continue
+            if conf_threshold is None and nms_iou_threshold is None:
+                results = model.batch_predict(im_list)
+            else:
+                model.postprocessor.conf_threshold = conf_threshold
+                model.postprocessor.nms_threshold = nms_iou_threshold
+                results = model.batch_predict(im_list)
+            for k in range(len(im_list)):
+                pred = {
+                    "bbox": [
+                        [c] + [s] + b
+                        for b, s, c in zip(
+                            results[k].boxes, results[k].scores, results[k].label_ids
+                        )
+                    ],
+                    "bbox_num": len(results[k].boxes),
+                    "im_id": im_id_list[k],
+                }
+                eval_metric.update(im_id_list[k], pred)
+            im_list.clear()
+            im_id_list.clear()
+
+        if i == image_num - 1:
+            end_time = time.time()
+    average_inference_time = round(
+        (end_time - start_time) / (image_num - twenty_percent_image_num), 4
+    )
+    eval_metric.accumulate()
+    eval_details = eval_metric.details
+    scores.update(eval_metric.get())
+    scores.update({"average_inference_time(s)": average_inference_time})
+    eval_metric.reset()
+    return scores
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py
new file mode 100755
index 0000000000..7cff221dd4
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/segmentation.py
@@ -0,0 +1,105 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tqdm import trange
+import numpy as np
+import collections
+import os
+import math
+import time
+
+
+def eval_segmentation(model, data_dir, batch_size=1):
+    import cv2
+    from .utils import Cityscapes
+    from .utils import f1_score, calculate_area, mean_iou, accuracy, kappa
+
+    assert os.path.isdir(data_dir), "The image_file_path:{} is not a directory.".format(
+        data_dir
+    )
+    eval_dataset = Cityscapes(dataset_root=data_dir, mode="val")
+    file_list = eval_dataset.file_list
+    image_num = eval_dataset.num_samples
+    num_classes = eval_dataset.num_classes
+    intersect_area_all = 0
+    pred_area_all = 0
+    label_area_all = 0
+    conf_mat_all = []
+    twenty_percent_image_num = math.ceil(image_num * 0.2)
+    start_time = 0
+    end_time = 0
+    average_inference_time = 0
+    im_list = []
+    label_list = []
+    for image_label_path, i in zip(
+        file_list, trange(image_num, desc="Inference Progress")
+    ):
+        if i == twenty_percent_image_num:
+            start_time = time.time()
+        im = cv2.imread(image_label_path[0])
+        label = cv2.imread(image_label_path[1], cv2.IMREAD_GRAYSCALE)
+        label_list.append(label)
+        if batch_size == 1:
+            result = model.predict(im)
+            results = [result]
+        else:
+            im_list.append(im)
+            # If the batch_size is not satisfied, the remaining pictures are formed into a batch
+            if (i + 1) % batch_size != 0 and i != image_num - 1:
+                continue
+            results = model.batch_predict(im_list)
+        if i == image_num - 1:
+            end_time = time.time()
+            average_inference_time = round(
+                (end_time - start_time) / (image_num - twenty_percent_image_num), 4
+            )
+        for result, label in zip(results, label_list):
+            pred = np.array(result.label_map).reshape(result.shape[0], result.shape[1])
+            intersect_area, pred_area, label_area = calculate_area(
+                pred, label, num_classes
+            )
+            intersect_area_all = intersect_area_all + intersect_area
+            pred_area_all = pred_area_all + pred_area
+            label_area_all = label_area_all + label_area
+        im_list.clear()
+        label_list.clear()
+
+    class_iou, miou = mean_iou(intersect_area_all, pred_area_all, label_area_all)
+    class_acc, oacc = accuracy(intersect_area_all, pred_area_all)
+    kappa_res = kappa(intersect_area_all, pred_area_all, label_area_all)
+    category_f1score = f1_score(intersect_area_all, pred_area_all, label_area_all)
+
+    eval_metrics = collections.OrderedDict(
+        zip(
+            [
+                "miou",
+                "category_iou",
+                "oacc",
+                "category_acc",
+                "kappa",
+                "category_F1-score",
+                "average_inference_time(s)",
+            ],
+            [
+                miou,
+                class_iou,
+                oacc,
+                class_acc,
+                kappa_res,
+                category_f1score,
+                average_inference_time,
+            ],
+        )
+    )
+    return eval_metrics
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py
new file mode 100755
index 0000000000..7289bbe271
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/__init__.py
@@ -0,0 +1,23 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import fd_logging
+from .util import *
+from .coco_metrics import *
+from .seg_metrics import *
+from .json_results import *
+from .map_utils import *
+from .coco_utils import *
+from .coco import *
+from .cityscapes import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py
new file mode 100755
index 0000000000..bd39335a87
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/cityscapes.py
@@ -0,0 +1,78 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+from . import fd_logging as logging
+
+# import fd_logging as logging
+
+
+class Cityscapes(object):
+    """
+    Cityscapes dataset `https://www.cityscapes-dataset.com/`.
+    The folder structure is as follow:
+
+        cityscapes
+        |
+        |--leftImg8bit
+        |  |--train
+        |  |--val
+        |  |--test
+        |
+        |--gtFine
+        |  |--train
+        |  |--val
+        |  |--test
+
+    Args:
+        dataset_root (str): Cityscapes dataset directory.
+    """
+
+    NUM_CLASSES = 19
+
+    def __init__(self, dataset_root, mode):
+        self.dataset_root = dataset_root
+        self.file_list = list()
+        mode = mode.lower()
+        self.mode = mode
+        self.num_classes = self.NUM_CLASSES
+        self.ignore_index = 255
+
+        img_dir = os.path.join(self.dataset_root, "leftImg8bit")
+        label_dir = os.path.join(self.dataset_root, "gtFine")
+        if (
+            self.dataset_root is None
+            or not os.path.isdir(self.dataset_root)
+            or not os.path.isdir(img_dir)
+            or not os.path.isdir(label_dir)
+        ):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        label_files = sorted(
+            glob.glob(os.path.join(label_dir, mode, "*", "*_gtFine_labelTrainIds.png"))
+        )
+        img_files = sorted(
+            glob.glob(os.path.join(img_dir, mode, "*", "*_leftImg8bit.png"))
+        )
+
+        self.file_list = [
+            [img_path, label_path]
+            for img_path, label_path in zip(img_files, label_files)
+        ]
+
+        self.num_samples = len(self.file_list)
+        logging.info("{} samples in file {}".format(self.num_samples, img_dir))
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py
new file mode 100755
index 0000000000..e4b702cb46
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco.py
@@ -0,0 +1,176 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import copy
+import os.path as osp
+import sys
+import numpy as np
+from . import fd_logging as logging
+from .util import is_pic, get_num_workers
+
+
+class CocoDetection(object):
+    """读取MSCOCO格式的检测数据集，并对样本进行相应的处理，该格式的数据集同样可以应用到实例分割模型的训练中。
+
+    Args:
+        data_dir (str): 数据集所在的目录路径。
+        ann_file (str): 数据集的标注文件，为一个独立的json格式文件。
+        num_workers (int|str): 数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时，根据
+            系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8，则`num_workers`为8，否则为CPU核数的一半。
+        shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+        allow_empty (bool): 是否加载负样本。默认为False。
+        empty_ratio (float): 用于指定负样本占总样本数的比例。如果小于0或大于等于1，则保留全部的负样本。默认为1。
+    """
+
+    def __init__(
+        self,
+        data_dir,
+        ann_file,
+        num_workers="auto",
+        shuffle=False,
+        allow_empty=False,
+        empty_ratio=1.0,
+    ):
+
+        from pycocotools.coco import COCO
+
+        self.data_dir = data_dir
+        self.data_fields = None
+        self.num_max_boxes = 1000
+        self.num_workers = get_num_workers(num_workers)
+        self.shuffle = shuffle
+        self.allow_empty = allow_empty
+        self.empty_ratio = empty_ratio
+        self.file_list = list()
+        neg_file_list = list()
+        self.labels = list()
+
+        coco = COCO(ann_file)
+        self.coco_gt = coco
+        img_ids = sorted(coco.getImgIds())
+        cat_ids = coco.getCatIds()
+        catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+        cname2clsid = dict(
+            {
+                coco.loadCats(catid)[0]["name"]: clsid
+                for catid, clsid in catid2clsid.items()
+            }
+        )
+        for label, cid in sorted(cname2clsid.items(), key=lambda d: d[1]):
+            self.labels.append(label)
+        logging.info("Starting to read file list from dataset...")
+
+        ct = 0
+        for img_id in img_ids:
+            is_empty = False
+            img_anno = coco.loadImgs(img_id)[0]
+            im_fname = osp.join(data_dir, img_anno["file_name"])
+            if not is_pic(im_fname):
+                continue
+            im_w = float(img_anno["width"])
+            im_h = float(img_anno["height"])
+            ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
+            instances = coco.loadAnns(ins_anno_ids)
+
+            bboxes = []
+            for inst in instances:
+                x, y, box_w, box_h = inst["bbox"]
+                x1 = max(0, x)
+                y1 = max(0, y)
+                x2 = min(im_w - 1, x1 + max(0, box_w))
+                y2 = min(im_h - 1, y1 + max(0, box_h))
+                if inst["area"] > 0 and x2 >= x1 and y2 >= y1:
+                    inst["clean_bbox"] = [x1, y1, x2, y2]
+                    bboxes.append(inst)
+                else:
+                    logging.warning(
+                        "Found an invalid bbox in annotations: "
+                        "im_id: {}, area: {} x1: {}, y1: {}, x2: {}, y2: {}.".format(
+                            img_id, float(inst["area"]), x1, y1, x2, y2
+                        )
+                    )
+            num_bbox = len(bboxes)
+            if num_bbox == 0 and not self.allow_empty:
+                continue
+            elif num_bbox == 0:
+                is_empty = True
+
+            gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+            gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+            gt_score = np.ones((num_bbox, 1), dtype=np.float32)
+            is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
+            difficult = np.zeros((num_bbox, 1), dtype=np.int32)
+            gt_poly = [None] * num_bbox
+
+            has_segmentation = False
+            for i, box in reversed(list(enumerate(bboxes))):
+                catid = box["category_id"]
+                gt_class[i][0] = catid2clsid[catid]
+                gt_bbox[i, :] = box["clean_bbox"]
+                is_crowd[i][0] = box["iscrowd"]
+                if "segmentation" in box and box["iscrowd"] == 1:
+                    gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
+                elif "segmentation" in box and box["segmentation"]:
+                    if (
+                        not np.array(box["segmentation"], dtype=object).size > 0
+                        and not self.allow_empty
+                    ):
+                        gt_poly.pop(i)
+                        is_crowd = np.delete(is_crowd, i)
+                        gt_class = np.delete(gt_class, i)
+                        gt_bbox = np.delete(gt_bbox, i)
+                    else:
+                        gt_poly[i] = box["segmentation"]
+                    has_segmentation = True
+            if has_segmentation and not any(gt_poly) and not self.allow_empty:
+                continue
+
+            im_info = {
+                "im_id": np.array([img_id]).astype("int32"),
+                "image_shape": np.array([im_h, im_w]).astype("int32"),
+            }
+            label_info = {
+                "is_crowd": is_crowd,
+                "gt_class": gt_class,
+                "gt_bbox": gt_bbox,
+                "gt_score": gt_score,
+                "gt_poly": gt_poly,
+                "difficult": difficult,
+            }
+
+            if is_empty:
+                neg_file_list.append({"image": im_fname, **im_info, **label_info})
+            else:
+                self.file_list.append({"image": im_fname, **im_info, **label_info})
+            ct += 1
+
+            self.num_max_boxes = max(self.num_max_boxes, len(instances))
+
+        if not ct:
+            logging.error("No coco record found in %s' % (ann_file)", exit=True)
+        self.pos_num = len(self.file_list)
+        if self.allow_empty and neg_file_list:
+            self.file_list += self._sample_empty(neg_file_list)
+        logging.info(
+            "{} samples in file {}, including {} positive samples and {} negative samples.".format(
+                len(self.file_list),
+                ann_file,
+                self.pos_num,
+                len(self.file_list) - self.pos_num,
+            )
+        )
+        self.num_samples = len(self.file_list)
+
+        self._epoch = 0
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py
new file mode 100755
index 0000000000..18dfdc3bfa
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_metrics.py
@@ -0,0 +1,90 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import sys
+from collections import OrderedDict
+from .coco_utils import get_infer_results, cocoapi_eval
+
+
+class COCOMetric(object):
+    def __init__(self, coco_gt, **kwargs):
+        self.clsid2catid = {
+            i: cat["id"] for i, cat in enumerate(coco_gt.loadCats(coco_gt.getCatIds()))
+        }
+        self.coco_gt = coco_gt
+        self.classwise = kwargs.get("classwise", False)
+        self.bias = 0
+        self.reset()
+
+    def reset(self):
+        # only bbox and mask evaluation support currently
+        self.details = {
+            "gt": copy.deepcopy(self.coco_gt.dataset),
+            "bbox": [],
+            "mask": [],
+        }
+        self.eval_stats = {}
+
+    def update(self, im_id, outputs):
+        outs = {}
+        # outputs Tensor -> numpy.ndarray
+        for k, v in outputs.items():
+            outs[k] = v
+
+        outs["im_id"] = im_id
+        infer_results = get_infer_results(outs, self.clsid2catid, bias=self.bias)
+        self.details["bbox"] += infer_results["bbox"] if "bbox" in infer_results else []
+        self.details["mask"] += infer_results["mask"] if "mask" in infer_results else []
+
+    def accumulate(self):
+        if len(self.details["bbox"]) > 0:
+            bbox_stats = cocoapi_eval(
+                copy.deepcopy(self.details["bbox"]),
+                "bbox",
+                coco_gt=self.coco_gt,
+                classwise=self.classwise,
+            )
+            self.eval_stats["bbox"] = bbox_stats
+            sys.stdout.flush()
+
+        if len(self.details["mask"]) > 0:
+            seg_stats = cocoapi_eval(
+                copy.deepcopy(self.details["mask"]),
+                "segm",
+                coco_gt=self.coco_gt,
+                classwise=self.classwise,
+            )
+            self.eval_stats["mask"] = seg_stats
+            sys.stdout.flush()
+
+    def log(self):
+        pass
+
+    def get(self):
+        if "bbox" not in self.eval_stats:
+            return {"bbox_mmap": 0.0}
+        if "mask" in self.eval_stats:
+            return OrderedDict(
+                zip(
+                    ["bbox_mmap", "segm_mmap"],
+                    [self.eval_stats["bbox"][0], self.eval_stats["mask"][0]],
+                )
+            )
+        else:
+            return {"bbox_mmap": self.eval_stats["bbox"][0]}
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py
new file mode 100755
index 0000000000..aeedac7500
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/coco_utils.py
@@ -0,0 +1,233 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import numpy as np
+from .map_utils import draw_pr_curve
+from .json_results import (
+    get_det_res,
+    get_det_poly_res,
+    get_seg_res,
+    get_solov2_segm_res,
+)
+from . import fd_logging as logging
+import copy
+
+
+def loadRes(coco_obj, anns):
+    """
+    Load result file and return a result api object.
+    :param   resFile (str)     : file name of result file
+    :return: res (obj)         : result api object
+    """
+
+    # This function has the same functionality as pycocotools.COCO.loadRes,
+    # except that the input anns is list of results rather than a json file.
+    # Refer to
+    # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/coco.py#L305,
+
+    # matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
+    # or matplotlib.backends is imported for the first time
+    # pycocotools import matplotlib
+    import matplotlib
+
+    matplotlib.use("Agg")
+    from pycocotools.coco import COCO
+    import pycocotools.mask as maskUtils
+    import time
+
+    res = COCO()
+    res.dataset["images"] = [img for img in coco_obj.dataset["images"]]
+
+    tic = time.time()
+    assert isinstance(anns) == list, "results in not an array of objects"
+    annsImgIds = [ann["image_id"] for ann in anns]
+    assert set(annsImgIds) == (
+        set(annsImgIds) & set(coco_obj.getImgIds())
+    ), "Results do not correspond to current coco set"
+    if "caption" in anns[0]:
+        imgIds = set([img["id"] for img in res.dataset["images"]]) & set(
+            [ann["image_id"] for ann in anns]
+        )
+        res.dataset["images"] = [
+            img for img in res.dataset["images"] if img["id"] in imgIds
+        ]
+        for id, ann in enumerate(anns):
+            ann["id"] = id + 1
+    elif "bbox" in anns[0] and not anns[0]["bbox"] == []:
+        res.dataset["categories"] = copy.deepcopy(coco_obj.dataset["categories"])
+        for id, ann in enumerate(anns):
+            bb = ann["bbox"]
+            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
+            if not "segmentation" in ann:
+                ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+            ann["area"] = bb[2] * bb[3]
+            ann["id"] = id + 1
+            ann["iscrowd"] = 0
+    elif "segmentation" in anns[0]:
+        res.dataset["categories"] = copy.deepcopy(coco_obj.dataset["categories"])
+        for id, ann in enumerate(anns):
+            # now only support compressed RLE format as segmentation results
+            ann["area"] = maskUtils.area(ann["segmentation"])
+            if not "bbox" in ann:
+                ann["bbox"] = maskUtils.toBbox(ann["segmentation"])
+            ann["id"] = id + 1
+            ann["iscrowd"] = 0
+    elif "keypoints" in anns[0]:
+        res.dataset["categories"] = copy.deepcopy(coco_obj.dataset["categories"])
+        for id, ann in enumerate(anns):
+            s = ann["keypoints"]
+            x = s[0::3]
+            y = s[1::3]
+            x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+            ann["area"] = (x1 - x0) * (y1 - y0)
+            ann["id"] = id + 1
+            ann["bbox"] = [x0, y0, x1 - x0, y1 - y0]
+
+    res.dataset["annotations"] = anns
+    res.createIndex()
+    return res
+
+
+def get_infer_results(outs, catid, bias=0):
+    """
+    Get result at the stage of inference.
+    The output format is dictionary containing bbox or mask result.
+
+    For example, bbox result is a list and each element contains
+    image_id, category_id, bbox and score.
+    """
+    if outs is None or len(outs) == 0:
+        raise ValueError(
+            "The number of valid detection result if zero. Please use reasonable model and check input data."
+        )
+
+    im_id = outs["im_id"]
+
+    infer_res = {}
+    if "bbox" in outs:
+        if len(outs["bbox"]) > 0 and len(outs["bbox"][0]) > 6:
+            infer_res["bbox"] = get_det_poly_res(
+                outs["bbox"], outs["bbox_num"], im_id, catid, bias=bias
+            )
+        else:
+            infer_res["bbox"] = get_det_res(
+                outs["bbox"], outs["bbox_num"], im_id, catid, bias=bias
+            )
+
+    if "mask" in outs:
+        # mask post process
+        infer_res["mask"] = get_seg_res(
+            outs["mask"], outs["bbox"], outs["bbox_num"], im_id, catid
+        )
+
+    if "segm" in outs:
+        infer_res["segm"] = get_solov2_segm_res(outs, im_id, catid)
+
+    return infer_res
+
+
+def cocoapi_eval(
+    anns,
+    style,
+    coco_gt=None,
+    anno_file=None,
+    max_dets=(100, 300, 1000),
+    classwise=False,
+):
+    """
+    Args:
+        anns: Evaluation result.
+        style (str): COCOeval style, can be `bbox` , `segm` and `proposal`.
+        coco_gt (str): Whether to load COCOAPI through anno_file,
+                 eg: coco_gt = COCO(anno_file)
+        anno_file (str): COCO annotations file.
+        max_dets (tuple): COCO evaluation maxDets.
+        classwise (bool): Whether per-category AP and draw P-R Curve or not.
+    """
+    assert coco_gt is not None or anno_file is not None
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+
+    if coco_gt is None:
+        coco_gt = COCO(anno_file)
+    logging.info("Start evaluate...")
+    coco_dt = loadRes(coco_gt, anns)
+    if style == "proposal":
+        coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
+        coco_eval.params.useCats = 0
+        coco_eval.params.maxDets = list(max_dets)
+    else:
+        coco_eval = COCOeval(coco_gt, coco_dt, style)
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    if classwise:
+        # Compute per-category AP and PR curve
+        try:
+            from terminaltables import AsciiTable
+        except Exception as e:
+            logging.error(
+                "terminaltables not found, plaese install terminaltables. "
+                "for example: `pip install terminaltables`."
+            )
+            raise e
+        precisions = coco_eval.eval["precision"]
+        cat_ids = coco_gt.getCatIds()
+        # precision: (iou, recall, cls, area range, max dets)
+        assert len(cat_ids) == precisions.shape[2]
+        results_per_category = []
+        for idx, catId in enumerate(cat_ids):
+            # area range index 0: all area ranges
+            # max dets index -1: typically 100 per image
+            nm = coco_gt.loadCats(catId)[0]
+            precision = precisions[:, :, idx, 0, -1]
+            precision = precision[precision > -1]
+            if precision.size:
+                ap = np.mean(precision)
+            else:
+                ap = float("nan")
+            results_per_category.append((str(nm["name"]), "{:0.3f}".format(float(ap))))
+            pr_array = precisions[0, :, idx, 0, 2]
+            recall_array = np.arange(0.0, 1.01, 0.01)
+            draw_pr_curve(
+                pr_array,
+                recall_array,
+                out_dir=style + "_pr_curve",
+                file_name="{}_precision_recall_curve.jpg".format(nm["name"]),
+            )
+
+        num_columns = min(6, len(results_per_category) * 2)
+
+        import itertools
+
+        results_flatten = list(itertools.chain(*results_per_category))
+        headers = ["category", "AP"] * (num_columns // 2)
+        results_2d = itertools.zip_longest(
+            *[results_flatten[i::num_columns] for i in range(num_columns)]
+        )
+        table_data = [headers]
+        table_data += [result for result in results_2d]
+        table = AsciiTable(table_data)
+        logging.info("Per-category of {} AP: \n{}".format(style, table.table))
+        logging.info(
+            "per-category PR curve has output to {} folder.".format(style + "_pr_curve")
+        )
+    # flush coco evaluation result
+    sys.stdout.flush()
+    return coco_eval.stats
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py
new file mode 100755
index 0000000000..7f8b27444f
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/fd_logging.py
@@ -0,0 +1,61 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+import os
+import sys
+import colorama
+from colorama import init
+
+init(autoreset=True)
+levels = {0: "ERROR", 1: "WARNING", 2: "INFO", 3: "DEBUG"}
+
+
+def log(level=2, message="", use_color=False):
+    current_time = time.time()
+    time_array = time.localtime(current_time)
+    current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
+    if use_color:
+        print(
+            "\033[1;31;40m{} [{}]\t{}\033[0m".format(
+                current_time, levels[level], message
+            )
+            .encode("utf-8")
+            .decode("latin1")
+        )
+    else:
+        print(
+            "{} [{}]\t{}".format(current_time, levels[level], message)
+            .encode("utf-8")
+            .decode("latin1")
+        )
+    sys.stdout.flush()
+
+
+def debug(message="", use_color=False):
+    log(level=3, message=message, use_color=use_color)
+
+
+def info(message="", use_color=False):
+    log(level=2, message=message, use_color=use_color)
+
+
+def warning(message="", use_color=True):
+    log(level=1, message=message, use_color=use_color)
+
+
+def error(message="", use_color=True, exit=True):
+    log(level=0, message=message, use_color=use_color)
+    if exit:
+        sys.exit(-1)
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py
new file mode 100755
index 0000000000..3633cbaf08
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/json_results.py
@@ -0,0 +1,162 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import six
+import numpy as np
+
+
+def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
+    det_res = []
+    for i in range(bbox_nums):
+        cur_image_id = int(image_id)
+        dt = bboxes[i]
+        num_id, score, xmin, ymin, xmax, ymax = dt
+        if int(num_id) < 0:
+            continue
+        category_id = label_to_cat_id_map[int(num_id)]
+        w = xmax - xmin + bias
+        h = ymax - ymin + bias
+        bbox = [xmin, ymin, w, h]
+        dt_res = {
+            "image_id": cur_image_id,
+            "category_id": category_id,
+            "bbox": bbox,
+            "score": score,
+        }
+        det_res.append(dt_res)
+    return det_res
+
+
+def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
+    det_res = []
+    k = 0
+    for i in range(len(bbox_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = bbox_nums[i]
+        for j in range(det_nums):
+            dt = bboxes[k]
+            k = k + 1
+            num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist()
+            if int(num_id) < 0:
+                continue
+            category_id = label_to_cat_id_map[int(num_id)]
+            rbox = [x1, y1, x2, y2, x3, y3, x4, y4]
+            dt_res = {
+                "image_id": cur_image_id,
+                "category_id": category_id,
+                "bbox": rbox,
+                "score": score,
+            }
+            det_res.append(dt_res)
+    return det_res
+
+
+def strip_mask(mask):
+    row = mask[0, 0, :]
+    col = mask[0, :, 0]
+    im_h = len(col) - np.count_nonzero(col == -1)
+    im_w = len(row) - np.count_nonzero(row == -1)
+    return mask[:, :im_h, :im_w]
+
+
+def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
+    import pycocotools.mask as mask_util
+
+    seg_res = []
+    k = 0
+    for i in range(len(mask_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = mask_nums[i]
+        mask_i = masks[k : k + det_nums]
+        mask_i = strip_mask(mask_i)
+        for j in range(det_nums):
+            mask = mask_i[j].astype(np.uint8)
+            score = float(bboxes[k][1])
+            label = int(bboxes[k][0])
+            k = k + 1
+            if label == -1:
+                continue
+            cat_id = label_to_cat_id_map[label]
+            rle = mask_util.encode(
+                np.array(mask[:, :, None], order="F", dtype="uint8")
+            )[0]
+            if six.PY3:
+                if "counts" in rle:
+                    rle["counts"] = rle["counts"].decode("utf8")
+            sg_res = {
+                "image_id": cur_image_id,
+                "category_id": cat_id,
+                "segmentation": rle,
+                "score": score,
+            }
+            seg_res.append(sg_res)
+    return seg_res
+
+
+def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map):
+    import pycocotools.mask as mask_util
+
+    segm_res = []
+    # for each batch
+    segms = results["segm"].astype(np.uint8)
+    clsid_labels = results["cate_label"]
+    clsid_scores = results["cate_score"]
+    lengths = segms.shape[0]
+    im_id = int(image_id[0][0])
+    if lengths == 0 or segms is None:
+        return None
+    # for each sample
+    for i in range(lengths - 1):
+        clsid = int(clsid_labels[i])
+        catid = num_id_to_cat_id_map[clsid]
+        score = float(clsid_scores[i])
+        mask = segms[i]
+        segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order="F"))[0]
+        segm["counts"] = segm["counts"].decode("utf8")
+        coco_res = {
+            "image_id": im_id,
+            "category_id": catid,
+            "segmentation": segm,
+            "score": score,
+        }
+        segm_res.append(coco_res)
+    return segm_res
+
+
+def get_keypoint_res(results, im_id):
+    anns = []
+    preds = results["keypoint"]
+    for idx in range(im_id.shape[0]):
+        image_id = im_id[idx].item()
+        kpts, scores = preds[idx]
+        for kpt, score in zip(kpts, scores):
+            kpt = kpt.flatten()
+            ann = {
+                "image_id": image_id,
+                "category_id": 1,  # XXX hard code
+                "keypoints": kpt.tolist(),
+                "score": float(score),
+            }
+            x = kpt[0::3]
+            y = kpt[1::3]
+            x0, x1, y0, y1 = (
+                np.min(x).item(),
+                np.max(x).item(),
+                np.min(y).item(),
+                np.max(y).item(),
+            )
+            ann["area"] = (x1 - x0) * (y1 - y0)
+            ann["bbox"] = [x0, y0, x1 - x0, y1 - y0]
+            anns.append(ann)
+    return anns
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py
new file mode 100755
index 0000000000..28d9c56eeb
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/map_utils.py
@@ -0,0 +1,42 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import os
+
+
+def draw_pr_curve(
+    precision,
+    recall,
+    iou=0.5,
+    out_dir="pr_curve",
+    file_name="precision_recall_curve.jpg",
+):
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+    output_path = os.path.join(out_dir, file_name)
+    try:
+        import matplotlib.pyplot as plt
+    except Exception as e:
+        # logger.error('Matplotlib not found, plaese install matplotlib.'
+        #              'for example: `pip install matplotlib`.')
+        raise e
+    plt.cla()
+    plt.figure("P-R Curve")
+    plt.title("Precision/Recall Curve(IoU={})".format(iou))
+    plt.xlabel("Recall")
+    plt.ylabel("Precision")
+    plt.grid(True)
+    plt.plot(recall, precision)
+    plt.savefig(output_path)
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py
new file mode 100755
index 0000000000..8dbc2412ca
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/seg_metrics.py
@@ -0,0 +1,144 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+def f1_score(intersect_area, pred_area, label_area):
+    class_f1_sco = []
+    for i in range(len(intersect_area)):
+        if pred_area[i] + label_area[i] == 0:
+            f1_sco = 0
+        elif pred_area[i] == 0:
+            f1_sco = 0
+        else:
+            prec = intersect_area[i] / pred_area[i]
+            rec = intersect_area[i] / label_area[i]
+            f1_sco = 2 * prec * rec / (prec + rec)
+        class_f1_sco.append(f1_sco)
+    return np.array(class_f1_sco)
+
+
+def calculate_area(pred, label, num_classes, ignore_index=255):
+    """
+    Calculate intersect, prediction and label area
+
+    Args:
+        pred (np.ndarray): The prediction by model.
+        label (np.ndarray): The ground truth of image.
+        num_classes (int): The unique number of target classes.
+        ignore_index (int): Specifies a target value that is ignored. Default: 255.
+
+    Returns:
+        Numpy Array: The intersection area of prediction and the ground on all class.
+        Numpy Array: The prediction area on all class.
+        Numpy Array: The ground truth area on all class
+    """
+    if not pred.shape == label.shape:
+        raise ValueError(
+            "Shape of `pred` and `label should be equal, "
+            "but there are {} and {}.".format(pred.shape, label.shape)
+        )
+
+    mask = label != ignore_index
+    pred = pred + 1
+    label = label + 1
+    pred = pred * mask
+    label = label * mask
+    pred = np.eye(num_classes + 1)[pred]
+    label = np.eye(num_classes + 1)[label]
+    pred = pred[:, 1:]
+    label = label[:, 1:]
+
+    pred_area = []
+    label_area = []
+    intersect_area = []
+
+    for i in range(num_classes):
+        pred_i = pred[:, :, i]
+        label_i = label[:, :, i]
+        pred_area_i = np.sum(pred_i)
+        label_area_i = np.sum(label_i)
+        intersect_area_i = np.sum(pred_i * label_i)
+        pred_area.append(pred_area_i)
+        label_area.append(label_area_i)
+        intersect_area.append(intersect_area_i)
+    return np.array(intersect_area), np.array(pred_area), np.array(label_area)
+
+
+def mean_iou(intersect_area, pred_area, label_area):
+    """
+    Calculate iou.
+
+    Args:
+        intersect_area (np.ndarray): The intersection area of prediction and ground truth on all classes.
+        pred_area (np.ndarray): The prediction area on all classes.
+        label_area (np.ndarray): The ground truth area on all classes.
+
+    Returns:
+        np.ndarray: iou on all classes.
+        float: mean iou of all classes.
+    """
+    union = pred_area + label_area - intersect_area
+    class_iou = []
+    for i in range(len(intersect_area)):
+        if union[i] == 0:
+            iou = 0
+        else:
+            iou = intersect_area[i] / union[i]
+        class_iou.append(iou)
+    miou = np.mean(class_iou)
+    return np.array(class_iou), miou
+
+
+def accuracy(intersect_area, pred_area):
+    """
+    Calculate accuracy
+
+    Args:
+        intersect_area (np.ndarray): The intersection area of prediction and ground truth on all classes..
+        pred_area (np.ndarray): The prediction area on all classes.
+
+    Returns:
+        np.ndarray: accuracy on all classes.
+        float: mean accuracy.
+    """
+    class_acc = []
+    for i in range(len(intersect_area)):
+        if pred_area[i] == 0:
+            acc = 0
+        else:
+            acc = intersect_area[i] / pred_area[i]
+        class_acc.append(acc)
+    macc = np.sum(intersect_area) / np.sum(pred_area)
+    return np.array(class_acc), macc
+
+
+def kappa(intersect_area, pred_area, label_area):
+    """
+    Calculate kappa coefficient
+
+    Args:
+        intersect_area (np.ndarray): The intersection area of prediction and ground truth on all classes..
+        pred_area (np.ndarray): The prediction area on all classes.
+        label_area (np.ndarray): The ground truth area on all classes.
+
+    Returns:
+        float: kappa coefficient.
+    """
+    total_area = np.sum(label_area)
+    po = np.sum(intersect_area) / total_area
+    pe = np.sum(pred_area * label_area) / (total_area * total_area)
+    kappa = (po - pe) / (1 - pe)
+    return kappa
diff --git a/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py
new file mode 100755
index 0000000000..8ecabb98cb
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/evaluation/utils/util.py
@@ -0,0 +1,34 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import platform
+import multiprocessing as mp
+
+
+def is_pic(img_name):
+    valid_suffix = ["JPEG", "jpeg", "JPG", "jpg", "BMP", "bmp", "PNG", "png"]
+    suffix = img_name.split(".")[-1]
+    if suffix not in valid_suffix:
+        return False
+    return True
+
+
+def get_num_workers(num_workers):
+    if not platform.system() == "Linux":
+        # Dataloader with multi-process model is not supported
+        # on MacOS and Windows currently.
+        return 0
+    if num_workers == "auto":
+        num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 2 else 2
+    return num_workers
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py
new file mode 100755
index 0000000000..95fb7ec058
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/__init__.py
@@ -0,0 +1,18 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib.pfld import PFLD
+from .contrib.pipnet import PIPNet
+from .contrib.face_landmark_1000 import FaceLandmark1000
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py
new file mode 100755
index 0000000000..373847dd43
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/face_landmark_1000.py
@@ -0,0 +1,76 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class FaceLandmark1000(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a face alignment model exported by FaceLandmark1000.
+
+        :param model_file: (str)Path of model file, e.g ./FaceLandmark1000.onnx
+        :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX
+        """
+
+        super(FaceLandmark1000, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.ONNX
+        ), "FaceLandmark1000 only support model format of ModelFormat.ONNX now."
+        self._model = C.vision.facealign.FaceLandmark1000(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "FaceLandmark1000 initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image landmarks
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: FaceAlignmentResult
+        """
+
+        return self._model.predict(input_image)
+
+    @property
+    def size(self):
+        """
+        Returns the preprocess image size, default (128, 128)
+        """
+        return self._model.size
+
+    @size.setter
+    def size(self, wh):
+        """
+        Set the preprocess image size, default (128, 128)
+        """
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py
new file mode 100755
index 0000000000..b11d272de9
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pfld.py
@@ -0,0 +1,76 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class PFLD(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a face alignment model exported by PFLD.
+
+        :param model_file: (str)Path of model file, e.g pfld/pfld-106-v3.onnx
+        :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX
+        """
+
+        super(PFLD, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.ONNX
+        ), "PFLD only support model format of ModelFormat.ONNX now."
+        self._model = C.vision.facealign.PFLD(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PFLD initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image landmarks
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: FaceAlignmentResult
+        """
+
+        return self._model.predict(input_image)
+
+    @property
+    def size(self):
+        """
+        Returns the preprocess image size, default (112, 112)
+        """
+        return self._model.size
+
+    @size.setter
+    def size(self, wh):
+        """
+        Set the preprocess image size, default (112, 112)
+        """
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py
new file mode 100755
index 0000000000..28e7d0b280
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facealign/contrib/pipnet.py
@@ -0,0 +1,118 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class PIPNet(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a face alignment model exported by PIPNet.
+
+        :param model_file: (str)Path of model file, e.g ./PIPNet.onnx
+        :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX
+        """
+
+        super(PIPNet, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.ONNX
+        ), "PIPNet only support model format of ModelFormat.ONNX now."
+        self._model = C.vision.facealign.PIPNet(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PIPNet initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image landmarks
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: FaceAlignmentResult
+        """
+
+        return self._model.predict(input_image)
+
+    @property
+    def size(self):
+        """
+        Returns the preprocess image size, default (256, 256)
+        """
+        return self._model.size
+
+    @property
+    def mean_vals(self):
+        """
+        Returns the mean value of normlization, default mean_vals = [0.485f, 0.456f, 0.406f];
+        """
+        return self._model.mean_vals
+
+    @property
+    def std_vals(self):
+        """
+        Returns the std value of normlization, default std_vals = [0.229f, 0.224f, 0.225f];
+        """
+        return self._model.std_vals
+
+    @property
+    def num_landmarks(self):
+        """
+        Returns the number of landmarks
+        """
+        return self._model.num_landmarks
+
+    @size.setter
+    def size(self, wh):
+        """
+        Set the preprocess image size, default (256, 256)
+        """
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @mean_vals.setter
+    def mean_vals(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `mean_vals` must be type of list."
+        self._model.mean_vals = value
+
+    @std_vals.setter
+    def std_vals(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `std_vals` must be type of list."
+        self._model.std_vals = value
+
+    @num_landmarks.setter
+    def num_landmarks(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `std_vals` must be type of int."
+        self._model.num_landmarks = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py
new file mode 100755
index 0000000000..d60a5ee076
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/__init__.py
@@ -0,0 +1,22 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib.yolov5face import YOLOv5Face
+from .contrib.yolov7face import *
+from .contrib.centerface import *
+from .contrib.blazeface import *
+from .contrib.retinaface import RetinaFace
+from .contrib.scrfd import SCRFD
+from .contrib.ultraface import UltraFace
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py
new file mode 100755
index 0000000000..00ce75b825
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/blazeface.py
@@ -0,0 +1,146 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class BlazeFacePreprocessor:
+    def __init__(self):
+        """Create a preprocessor for BlazeFace"""
+        self._preprocessor = C.vision.facedet.BlazeFacePreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for BlazeFace
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def is_scale_(self):
+        """
+        is_scale_ for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+        """
+        return self._preprocessor.is_scale_
+
+    @is_scale_.setter
+    def is_scale_(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_` must be type of bool."
+        self._preprocessor.is_scale_ = value
+
+
+class BlazeFacePostprocessor:
+    def __init__(self):
+        """Create a postprocessor for BlazeFace"""
+        self._postprocessor = C.vision.facedet.BlazeFacePostprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for BlazeFace
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.3
+        """
+        return self._postprocessor.nms_threshold
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+
+class BlazeFace(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        config_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a BlazeFace model exported by BlazeFace.
+
+        :param model_file: (str)Path of model file, e.g ./Blazeface.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(BlazeFace, self).__init__(runtime_option)
+
+        self._model = C.vision.facedet.BlazeFace(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+
+        assert self.initialized, "BlazeFace initialize failed."
+
+    def predict(self, input_image):
+        """Detect the location and key points of human faces from an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: FaceDetectionResult
+        """
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of FaceDetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get BlazefacePreprocessor object of the loaded model
+
+        :return BlazefacePreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get BlazefacePostprocessor object of the loaded model
+
+        :return BlazefacePostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py
new file mode 100755
index 0000000000..6bda7fb666
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/centerface.py
@@ -0,0 +1,150 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class CenterFacePreprocessor:
+    def __init__(self):
+        """Create a preprocessor for CenterFace"""
+        self._preprocessor = C.vision.facedet.CenterFacePreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for CenterFace
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+
+class CenterFacePostprocessor:
+    def __init__(self):
+        """Create a postprocessor for CenterFace"""
+        self._postprocessor = C.vision.facedet.CenterFacePostprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for CenterFace
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.3
+        """
+        return self._postprocessor.nms_threshold
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+
+class CenterFace(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a CenterFace model exported by CenterFace.
+
+        :param model_file: (str)Path of model file, e.g ./CenterFace.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(CenterFace, self).__init__(runtime_option)
+
+        self._model = C.vision.facedet.CenterFace(
+            model_file, params_file, self._runtime_option, model_format
+        )
+
+        assert self.initialized, "CenterFace initialize failed."
+
+    def predict(self, input_image):
+        """Detect the location and key points of human faces from an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: FaceDetectionResult
+        """
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get CenterFacePreprocessor object of the loaded model
+
+        :return CenterFacePreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get CenterFacePostprocessor object of the loaded model
+
+        :return CenterFacePostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py
new file mode 100755
index 0000000000..f3e72cfb0d
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/retinaface.py
@@ -0,0 +1,134 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class RetinaFace(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a RetinaFace model exported by RetinaFace.
+
+        :param model_file: (str)Path of model file, e.g ./retinaface.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(RetinaFace, self).__init__(runtime_option)
+
+        self._model = C.vision.facedet.RetinaFace(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "RetinaFace initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.7, nms_iou_threshold=0.3):
+        """Detect the location and key points of human faces from an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.7
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.3
+        :return: FaceDetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [640, 480]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default (640, 640)
+        """
+        return self._model.size
+
+    @property
+    def variance(self):
+        """
+        Argument for image postprocessing step, variance in RetinaFace's prior-box(anchor) generate process, default (0.1, 0.2)
+        """
+        return self._model.variance
+
+    @property
+    def downsample_strides(self):
+        """
+        Argument for image postprocessing step, downsample strides (namely, steps) for RetinaFace to generate anchors, will take (8,16,32) as default values
+        """
+        return self._model.downsample_strides
+
+    @property
+    def min_sizes(self):
+        """
+        Argument for image postprocessing step, min sizes, width and height for each anchor, default min_sizes = [[16, 32], [64, 128], [256, 512]]
+        """
+        return self._model.min_sizes
+
+    @property
+    def landmarks_per_face(self):
+        """
+        Argument for image postprocessing step, landmarks_per_face, default 5 in RetinaFace
+        """
+        return self._model.landmarks_per_face
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @variance.setter
+    def variance(self, value):
+        assert isinstance(
+            value, (list, tuple)
+        ), "The value to set `variance` must be type of tuple or list."
+        assert (
+            len(value) == 2
+        ), "The value to set `variance` must contatins 2 elements".format(len(value))
+        self._model.variance = value
+
+    @downsample_strides.setter
+    def downsample_strides(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `downsample_strides` must be type of list."
+        self._model.downsample_strides = value
+
+    @min_sizes.setter
+    def min_sizes(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `min_sizes` must be type of list."
+        self._model.min_sizes = value
+
+    @landmarks_per_face.setter
+    def landmarks_per_face(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `landmarks_per_face` must be type of int."
+        self._model.landmarks_per_face = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py
new file mode 100755
index 0000000000..f6d39f40a4
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/scrfd.py
@@ -0,0 +1,216 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class SCRFD(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a SCRFD model exported by SCRFD.
+
+        :param model_file: (str)Path of model file, e.g ./scrfd.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(SCRFD, self).__init__(runtime_option)
+
+        self._model = C.vision.facedet.SCRFD(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "SCRFD initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.7, nms_iou_threshold=0.3):
+        """Detect the location and key points of human faces from an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.7
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.3
+        :return: FaceDetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._model.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._model.disable_permute()
+
+    # 一些跟SCRFD模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [640, 640]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default (640, 640)
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @property
+    def downsample_strides(self):
+        """
+        Argument for image postprocessing step,
+        downsample strides (namely, steps) for SCRFD to generate anchors,
+        will take (8,16,32) as default values
+        """
+        return self._model.downsample_strides
+
+    @property
+    def landmarks_per_face(self):
+        """
+        Argument for image postprocessing step, landmarks_per_face, default 5 in SCRFD
+        """
+        return self._model.landmarks_per_face
+
+    @property
+    def use_kps(self):
+        """
+        Argument for image postprocessing step,
+        the outputs of onnx file with key points features or not, default true
+        """
+        return self._model.use_kps
+
+    @property
+    def max_nms(self):
+        """
+        Argument for image postprocessing step, the upperbond number of boxes processed by nms, default 30000
+        """
+        return self._model.max_nms
+
+    @property
+    def num_anchors(self):
+        """
+        Argument for image postprocessing step, anchor number of each stride, default 2
+        """
+        return self._model.num_anchors
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
+
+    @downsample_strides.setter
+    def downsample_strides(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `downsample_strides` must be type of list."
+        self._model.downsample_strides = value
+
+    @landmarks_per_face.setter
+    def landmarks_per_face(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `landmarks_per_face` must be type of int."
+        self._model.landmarks_per_face = value
+
+    @use_kps.setter
+    def use_kps(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `use_kps` must be type of bool."
+        self._model.use_kps = value
+
+    @max_nms.setter
+    def max_nms(self, value):
+        assert isinstance(value, int), "The value to set `max_nms` must be type of int."
+        self._model.max_nms = value
+
+    @num_anchors.setter
+    def num_anchors(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `num_anchors` must be type of int."
+        self._model.num_anchors = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py
new file mode 100755
index 0000000000..48c4f9b034
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/ultraface.py
@@ -0,0 +1,75 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class UltraFace(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a UltraFace model exported by UltraFace.
+
+        :param model_file: (str)Path of model file, e.g ./ultraface.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(UltraFace, self).__init__(runtime_option)
+
+        self._model = C.vision.facedet.UltraFace(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "UltraFace initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.7, nms_iou_threshold=0.3):
+        """Detect the location and key points of human faces from an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.7
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.3
+        :return: FaceDetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟UltraFace模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [640, 480]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default (320, 240)
+        """
+        return self._model.size
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py
new file mode 100755
index 0000000000..903e7fba1f
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov5face.py
@@ -0,0 +1,147 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class YOLOv5Face(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv5Face model exported by YOLOv5Face.
+
+        :param model_file: (str)Path of model file, e.g ./yolov5face.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv5Face, self).__init__(runtime_option)
+
+        self._model = C.vision.facedet.YOLOv5Face(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv5Face initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        """Detect the location and key points of human faces from an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :return: FaceDetectionResult
+        """
+        return self._model.predict(input_image, conf_threshold, nms_iou_threshold)
+
+    # 一些跟YOLOv5Face模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640,640]
+        """
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        #  padding value, size should be the same as channels
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        # only pad to the minimum rectange which height and width is times of stride
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        # padding stride, for is_mini_pad
+        return self._model.stride
+
+    @property
+    def landmarks_per_face(self):
+        """
+        Argument for image postprocessing step, landmarks_per_face, default 5 in YOLOv5Face
+        """
+        return self._model.landmarks_per_face
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
+
+    @landmarks_per_face.setter
+    def landmarks_per_face(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `landmarks_per_face` must be type of int."
+        self._model.landmarks_per_face = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py
new file mode 100755
index 0000000000..0b75cc1bd1
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/facedet/contrib/yolov7face.py
@@ -0,0 +1,193 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class Yolov7FacePreprocessor:
+    def __init__(self):
+        """Create a preprocessor for Yolov7Face"""
+        self._preprocessor = C.vision.facedet.Yolov7Preprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for Yolov7Face
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @property
+    def padding_color_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
+        #  padding value, size should be the same as channels
+        return self._preprocessor.padding_color_value
+
+    @property
+    def is_scale_up(self):
+        """
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+        """
+        return self._preprocessor.is_scale_up
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._preprocessor.size = wh
+
+    @padding_color_value.setter
+    def padding_color_value(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `padding_color_value` must be type of list."
+        self._preprocessor.padding_color_value = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
+
+
+class Yolov7FacePostprocessor:
+    def __init__(self):
+        """Create a postprocessor for Yolov7Face"""
+        self._postprocessor = C.vision.facedet.Yolov7FacePostprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for Yolov7Face
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.45
+        """
+        return self._postprocessor.nms_threshold
+
+    @property
+    def landmarks_per_face(self):
+        """
+        landmarks per face for postprocessing, default is 5
+        """
+        return self._postprocessor.landmarks_per_face
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(
+            conf_threshold, float
+        ), "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(
+            nms_threshold, float
+        ), "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+    @landmarks_per_face.setter
+    def landmarks_per_face(self, landmarks_per_face):
+        assert isinstance(
+            landmarks_per_face, int
+        ), "The value to set `landmarks_per_face` must be type of int."
+        self._postprocessor.landmarks_per_face = landmarks_per_face
+
+
+class YOLOv7Face(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a YOLOv7Face model exported by YOLOv7Face.
+
+        :param model_file: (str)Path of model file, e.g ./yolov7face.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(YOLOv7Face, self).__init__(runtime_option)
+
+        self._model = C.vision.facedet.YOLOv7Face(
+            model_file, params_file, self._runtime_option, model_format
+        )
+
+        assert self.initialized, "YOLOv7Face initialize failed."
+
+    def predict(self, input_image):
+        """Detect the location and key points of human faces from an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: FaceDetectionResult
+        """
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get YOLOv7Preprocessor object of the loaded model
+
+        :return YOLOv7Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get YOLOv7Postprocessor object of the loaded model
+
+        :return YOLOv7Postprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py
new file mode 100755
index 0000000000..f5bde6daed
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py
new file mode 100755
index 0000000000..d18dd52211
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/__init__.py
@@ -0,0 +1,17 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .insightface import *
+from .adaface import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py
new file mode 100755
index 0000000000..985f1111b8
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/adaface/__init__.py
@@ -0,0 +1,109 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from ..... import UltraInferModel, ModelFormat
+from ..... import c_lib_wrap as C
+
+
+class AdaFacePreprocessor:
+    def __init__(self):
+        """Create a preprocessor for AdaFace Model"""
+        self._preprocessor = C.vision.faceid.AdaFacePreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for AdaFace Model
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor, include image, scale_factor, im_shape
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class AdaFacePostprocessor:
+    def __init__(self):
+        """Create a postprocessor for AdaFace Model"""
+        self._postprocessor = C.vision.faceid.AdaFacePostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for PaddleClas Model
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of FaceRecognitionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+    @property
+    def l2_normalize(self):
+        """
+        confidence threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.l2_normalize
+
+
+class AdaFace(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a AdaFace model exported by PaddleClas.
+
+        :param model_file: (str)Path of model file, e.g adaface/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g adaface/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(AdaFace, self).__init__(runtime_option)
+        self._model = C.vision.faceid.AdaFace(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "AdaFace model initialize failed."
+
+    def predict(self, im):
+        """Detect an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: DetectionResult
+        """
+
+        assert im is not None, "The input image data is None."
+        return self._model.predict(im)
+
+    def batch_predict(self, images):
+        """Detect a batch of input image list
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get AdaFacePreprocessor object of the loaded model
+
+        :return AdaFacePreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get AdaFacePostprocessor object of the loaded model
+
+        :return AdaFacePostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py
new file mode 100755
index 0000000000..0aab2c78b8
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/faceid/contrib/insightface/__init__.py
@@ -0,0 +1,237 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from ..... import UltraInferModel, ModelFormat
+from ..... import c_lib_wrap as C
+
+
+class InsightFaceRecognitionPreprocessor:
+    def __init__(self):
+        """Create a preprocessor for InsightFaceRecognition Model"""
+        self._preprocessor = C.vision.faceid.InsightFaceRecognitionPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for InsightFaceRecognition Model
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor, include image, scale_factor, im_shape
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, tuple of (width, height),
+        decide the target size after resize, default (112, 112)
+        """
+        return self._preprocessor.size
+
+    @property
+    def alpha(self):
+        """
+        Argument for image preprocessing step, alpha values for normalization,
+        default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+        """
+        return self._preprocessor.alpha
+
+    @property
+    def beta(self):
+        """
+        Argument for image preprocessing step, beta values for normalization,
+        default beta = {-1.f, -1.f, -1.f}
+        """
+        return self._preprocessor.beta
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._preprocessor.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._preprocessor.disable_permute()
+
+
+class InsightFaceRecognitionPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for InsightFaceRecognition Model"""
+        self._postprocessor = C.vision.faceid.InsightFaceRecognitionPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for PaddleClas Model
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of FaceRecognitionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+    @property
+    def l2_normalize(self):
+        """
+        confidence threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.l2_normalize
+
+
+class InsightFaceRecognitionBase(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a InsightFaceRecognitionBase model exported by PaddleClas.
+
+        :param model_file: (str)Path of model file, e.g InsightFaceRecognitionBase/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g InsightFaceRecognitionBase/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(InsightFaceRecognitionBase, self).__init__(runtime_option)
+        self._model = C.vision.faceid.InsightFaceRecognitionBase(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "InsightFaceRecognitionBase model initialize failed."
+
+    def predict(self, im):
+        """Detect an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: DetectionResult
+        """
+
+        assert im is not None, "The input image data is None."
+        return self._model.predict(im)
+
+    def batch_predict(self, images):
+        """Detect a batch of input image list
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get InsightFaceRecognitionPreprocessor object of the loaded model
+
+        :return InsightFaceRecognitionPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get InsightFaceRecognitionPostprocessor object of the loaded model
+
+        :return InsightFaceRecognitionPostprocessor
+        """
+        return self._model.postprocessor
+
+
+class ArcFace(InsightFaceRecognitionBase):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a ArcFace model exported by PaddleClas.
+        :param model_file: (str)Path of model file, e.g ArcFace/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ArcFace/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(InsightFaceRecognitionBase, self).__init__(runtime_option)
+
+        self._model = C.vision.faceid.ArcFace(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "ArcFace model initialize failed."
+
+
+class CosFace(InsightFaceRecognitionBase):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a CosFace model exported by PaddleClas.
+        :param model_file: (str)Path of model file, e.g CosFace/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g CosFace/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(InsightFaceRecognitionBase, self).__init__(runtime_option)
+
+        self._model = C.vision.faceid.CosFace(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "CosFace model initialize failed."
+
+
+class PartialFC(InsightFaceRecognitionBase):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a PartialFC model exported by PaddleClas.
+        :param model_file: (str)Path of model file, e.g PartialFC/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g PartialFC/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(InsightFaceRecognitionBase, self).__init__(runtime_option)
+
+        self._model = C.vision.faceid.PartialFC(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PartialFC model initialize failed."
+
+
+class VPL(InsightFaceRecognitionBase):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a VPL model exported by PaddleClas.
+        :param model_file: (str)Path of model file, e.g VPL/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g VPL/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+
+        super(InsightFaceRecognitionBase, self).__init__(runtime_option)
+
+        self._model = C.vision.faceid.VPL(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "VPL model initialize failed."
diff --git a/libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py
new file mode 100755
index 0000000000..6829f3fce7
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/generation/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib.anemigan import AnimeGAN
diff --git a/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py
new file mode 100755
index 0000000000..d75a9b4929
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/generation/contrib/anemigan.py
@@ -0,0 +1,103 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class AnimeGANPreprocessor:
+    def __init__(self, config_file):
+        """Create a preprocessor for AnimeGAN."""
+        self._preprocessor = C.vision.generation.AnimeGANPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for AnimeGAN.
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class AnimeGANPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for AnimeGAN."""
+        self._postprocessor = C.vision.generation.AnimeGANPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for AnimeGAN
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: results: (list) Final results
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class AnimeGAN(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a AnimeGAN model.
+
+        :param model_file: (str)Path of model file, e.g ./model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ./model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # call super constructor to initialize self._runtime_option
+        super(AnimeGAN, self).__init__(runtime_option)
+
+        self._model = C.vision.generation.AnimeGAN(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # assert self.initialized to confirm initialization successfully.
+        assert self.initialized, "AnimeGAN initialize failed."
+
+    def predict(self, input_image):
+        """Predict the style transfer result for an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: style transfer result
+        """
+        return self._model.predict(input_image)
+
+    def batch_predict(self, input_images):
+        """Predict the style transfer result for multiple input images
+
+        :param input_images: (list of numpy.ndarray)The list of input image data, each image is a 3-D array with layout HWC, BGR format
+        :return: a list of style transfer results
+        """
+        return self._model.batch_predict(input_images)
+
+    @property
+    def preprocessor(self):
+        """Get AnimeGANPreprocessor object of the loaded model
+
+        :return AnimeGANPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get AnimeGANPostprocessor object of the loaded model
+
+        :return AnimeGANPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py
new file mode 100755
index 0000000000..9205fcd814
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/headpose/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib.fsanet import FSANet
diff --git a/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py
new file mode 100755
index 0000000000..373b3d62bc
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/headpose/contrib/fsanet.py
@@ -0,0 +1,76 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class FSANet(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a headpose model exported by FSANet.
+
+        :param model_file: (str)Path of model file, e.g fsanet/fsanet-var.onnx
+        :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX
+        """
+
+        super(FSANet, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.ONNX
+        ), "FSANet only support model format of ModelFormat.ONNX now."
+        self._model = C.vision.headpose.FSANet(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "FSANet initialize failed."
+
+    def predict(self, input_image):
+        """Predict an input image headpose
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: HeadPoseResult
+        """
+
+        return self._model.predict(input_image)
+
+    @property
+    def size(self):
+        """
+        Returns the preprocess image size, default (64, 64)
+        """
+        return self._model.size
+
+    @size.setter
+    def size(self, wh):
+        """
+        Set the preprocess image size, default (64, 64)
+        """
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
diff --git a/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py
new file mode 100755
index 0000000000..36159c84d1
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .pptinypose import PPTinyPose
diff --git a/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py
new file mode 100755
index 0000000000..ef5b6a6ca4
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/keypointdetection/pptinypose/__init__.py
@@ -0,0 +1,90 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class PPTinyPose(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """load a PPTinyPose model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g pptinypose/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g pptinypose/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g pptinypose/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PPTinyPose, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PPTinyPose model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.keypointdetection.PPTinyPose(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPTinyPose model initialize failed."
+
+    def predict(self, input_image, detection_result=None):
+        """Detect keypoints in an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param detection_result: (DetectionResult)Pre-detected boxes result, default is None
+        :return: KeyPointDetectionResult
+        """
+        assert input_image is not None, "The input image data is None."
+        if detection_result:
+            return self._model.predict(input_image, detection_result)
+        else:
+            return self._model.predict(input_image)
+
+    @property
+    def use_dark(self):
+        """Atrribute of PPTinyPose model. Stating whether using Distribution-Aware Coordinate Representation for Human Pose Estimation(DARK for short) in postprocess, default is True
+
+        :return: value of use_dark(bool)
+        """
+        return self._model.use_dark
+
+    @use_dark.setter
+    def use_dark(self, value):
+        """Set attribute use_dark of PPTinyPose model.
+
+        :param value: (bool)The value to set use_dark
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `use_dark` must be type of bool."
+        self._model.use_dark = value
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._model.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._model.disable_permute()
diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py
new file mode 100755
index 0000000000..17961245b2
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/matting/__init__.py
@@ -0,0 +1,18 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib.modnet import MODNet
+from .contrib.rvm import RobustVideoMatting
+from .ppmatting import PPMatting
diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py
new file mode 100755
index 0000000000..b719ea8734
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/modnet.py
@@ -0,0 +1,125 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class MODNet(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a MODNet model exported by MODNet.
+
+        :param model_file: (str)Path of model file, e.g ./modnet.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(MODNet, self).__init__(runtime_option)
+
+        self._model = C.vision.matting.MODNet(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "MODNet initialize failed."
+
+    def predict(self, input_image):
+        """Predict the matting result for an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: MattingResult
+        """
+        return self._model.predict(input_image)
+
+    # 一些跟模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [256, 256]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [256,256]
+        """
+        return self._model.size
+
+    @property
+    def alpha(self):
+        """
+        Argument for image preprocessing step, alpha value for normalization, default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}
+        """
+        return self._model.alpha
+
+    @property
+    def beta(self):
+        """
+        Argument for image preprocessing step, beta value for normalization, default beta = {-1.f, -1.f, -1.f}
+        """
+        return self._model.beta
+
+    @property
+    def swap_rb(self):
+        """
+        Argument for image preprocessing step, whether to swap the B and R channel, such as BGR->RGB, default True.
+        """
+        return self._model.swap_rb
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @alpha.setter
+    def alpha(self, value):
+        assert isinstance(
+            value, (list, tuple)
+        ), "The value to set `alpha` must be type of tuple or list."
+        assert (
+            len(value) == 3
+        ), "The value to set `alpha` must contatins 3 elements for each channels, but now it contains {} elements.".format(
+            len(value)
+        )
+        self._model.alpha = value
+
+    @beta.setter
+    def beta(self, value):
+        assert isinstance(
+            value, (list, tuple)
+        ), "The value to set `beta` must be type of tuple or list."
+        assert (
+            len(value) == 3
+        ), "The value to set `beta` must contatins 3 elements for each channels, but now it contains {} elements.".format(
+            len(value)
+        )
+        self._model.beta = value
+
+    @swap_rb.setter
+    def swap_rb(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `swap_rb` must be type of bool."
+        self._model.swap_rb = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py
new file mode 100755
index 0000000000..f00793bef4
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/matting/contrib/rvm.py
@@ -0,0 +1,105 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class RobustVideoMatting(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.ONNX,
+    ):
+        """Load a video matting model exported by RobustVideoMatting.
+
+        :param model_file: (str)Path of model file, e.g rvm/rvm_mobilenetv3_fp32.onnx
+        :param params_file: (str)Path of parameters file, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model, default is ONNX
+        """
+        super(RobustVideoMatting, self).__init__(runtime_option)
+
+        self._model = C.vision.matting.RobustVideoMatting(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "RobustVideoMatting initialize failed."
+
+    def predict(self, input_image):
+        """Matting an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: MattingResult
+        """
+        return self._model.predict(input_image)
+
+    @property
+    def size(self):
+        """
+        Returns the preprocess image size
+        """
+        return self._model.size
+
+    @property
+    def video_mode(self):
+        """
+        Whether to open the video mode, if there are some irrelevant pictures, set it to fasle, the default is true
+        """
+        return self._model.video_mode
+
+    @property
+    def swap_rb(self):
+        """
+        Whether convert to RGB, Set to false if you have converted YUV format images to RGB outside the model, dafault true
+        """
+        return self._model.swap_rb
+
+    @size.setter
+    def size(self, wh):
+        """
+        Set the preprocess image size
+        """
+        assert isinstance(
+            wh, (list, tuple)
+        ), "The value to set `size` must be type of tuple or list."
+        assert (
+            len(wh) == 2
+        ), "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh)
+        )
+        self._model.size = wh
+
+    @video_mode.setter
+    def video_mode(self, value):
+        """
+        Set video_mode property, the default is true
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `video_mode` must be type of bool."
+        self._model.video_mode = value
+
+    @swap_rb.setter
+    def swap_rb(self, value):
+        """
+        Set swap_rb property, the default is true
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `swap_rb` must be type of bool."
+        self._model.swap_rb = value
diff --git a/libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py
new file mode 100755
index 0000000000..536d3b331e
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/matting/ppmatting/__init__.py
@@ -0,0 +1,55 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class PPMatting(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PPMatting model exported by PaddleSeg.
+
+        :param model_file: (str)Path of model file, e.g PPMatting-512/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g PPMatting-512/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str)Path of configuration file for deployment, e.g PPMatting-512/deploy.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PPMatting, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PPMatting model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.matting.PPMatting(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPMatting model initialize failed."
+
+    def predict(self, input_image):
+        """Predict the matting result for an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: MattingResult
+        """
+        assert input_image is not None, "The input image data is None."
+        return self._model.predict(input_image)
diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py
new file mode 100755
index 0000000000..e41e77900a
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .ppocr import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py
new file mode 100755
index 0000000000..2582f92f34
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/__init__.py
@@ -0,0 +1,1928 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+import math
+import os
+import re
+import tempfile
+
+from dataclasses import dataclass
+from tokenizers import Tokenizer as TokenizerFast
+
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+from ...common import ProcessorManager
+from ....py_only import PyOnlyProcessorChain
+from ....py_only.vision import PyOnlyVisionModel, processors as P
+from ....utils.misc import load_config
+from .utils.ser_vi_layoutxlm.vqa_utils import *
+from .utils.ser_vi_layoutxlm.transforms import *
+from .utils.ser_vi_layoutxlm.operators import *
+
+
+def sort_boxes(boxes):
+    return C.vision.ocr.sort_boxes(boxes)
+
+
+class UVDocPreprocessor(ProcessorManager):
+    def __init__(self):
+        """Create a preprocessor for UVDoc Model"""
+        super(UVDocPreprocessor, self).__init__()
+        self._manager = C.vision.ocr.UVDocPreprocessor()
+
+    def set_normalize(self, mean, std, is_scale):
+        """Set preprocess normalize parameters, please call this API to
+           customize the normalize parameters, otherwise it will use the default
+           normalize parameters.
+        :param: mean: (list of float) mean values
+        :param: std: (list of float) std values
+        :param: is_scale: (boolean) whether to scale
+        """
+        self._manager.set_normalize(mean, std, is_scale)
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+
+class UVDocPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for UVDoc Model"""
+        self._postprocessor = C.vision.ocr.UVDocPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for UVDoc Model
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class UVDocWarpper(UltraInferModel):
+    def __init__(
+        self,
+        model_file="",
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load OCR recognition model provided by PaddleOCR
+
+        :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_rec_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_rec_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(UVDocWarpper, self).__init__(runtime_option)
+
+        if len(model_file) == 0:
+            self._model = C.vision.ocr.UVDocWarpper()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.UVDocWarpper(
+                model_file, params_file, self._runtime_option, model_format
+            )
+            assert self.initialized, "UVDocWarpper initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone OCR recognition model object
+        :return: a new OCR recognition model object
+        """
+
+        class UVDocWarpperClone(UVDocWarpper):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = UVDocWarpperClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: rec_text, rec_score
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: list of rec_text, list of rec_score
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @preprocessor.setter
+    def preprocessor(self, value):
+        self._model.preprocessor = value
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    @postprocessor.setter
+    def postprocessor(self, value):
+        self._model.postprocessor = value
+
+
+class DBDetectorPreprocessor(ProcessorManager):
+    def __init__(self):
+        """
+        Create a preprocessor for DBDetectorModel
+        """
+        super(DBDetectorPreprocessor, self).__init__()
+        self._manager = C.vision.ocr.DBDetectorPreprocessor()
+
+    @property
+    def max_side_len(self):
+        """Get max_side_len value."""
+        return self._manager.max_side_len
+
+    @max_side_len.setter
+    def max_side_len(self, value):
+        """Set max_side_len value.
+        :param: value: (int) max_side_len value
+        """
+        assert isinstance(
+            value, int
+        ), "The value to set `max_side_len` must be type of int."
+        self._manager.max_side_len = value
+
+    def set_normalize(self, mean, std, is_scale):
+        """Set preprocess normalize parameters, please call this API to
+           customize the normalize parameters, otherwise it will use the default
+           normalize parameters.
+        :param: mean: (list of float) mean values
+        :param: std: (list of float) std values
+        :param: is_scale: (boolean) whether to scale
+        """
+        self._manager.set_normalize(mean, std, is_scale)
+
+    @property
+    def static_shape_infer(self):
+        return self._manager.static_shape_infer
+
+    @static_shape_infer.setter
+    def static_shape_infer(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `static_shape_infer` must be type of bool."
+        self._manager.static_shape_infer = value
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+
+class DBDetectorPostprocessor:
+    def __init__(self):
+        """
+        Create a postprocessor for DBDetectorModel
+        """
+        self._postprocessor = C.vision.ocr.DBDetectorPostprocessor()
+
+    def run(self, runtime_results, batch_det_img_info):
+        """Postprocess the runtime results for DBDetectorModel
+
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :param: batch_det_img_info: (list of std::array<int, 4>)The output of det_preprocessor
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, batch_det_img_info)
+
+    @property
+    def det_db_thresh(self):
+        """
+        Return the det_db_thresh of DBDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_thresh
+
+    @det_db_thresh.setter
+    def det_db_thresh(self, value):
+        """Set the det_db_thresh for DBDetectorPostprocessor
+
+        :param: value : the det_db_thresh value
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_thresh` must be type of float."
+        self._postprocessor.det_db_thresh = value
+
+    @property
+    def det_db_box_thresh(self):
+        """
+        Return the det_db_box_thresh of DBDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_box_thresh
+
+    @det_db_box_thresh.setter
+    def det_db_box_thresh(self, value):
+        """Set the det_db_box_thresh for DBDetectorPostprocessor
+
+        :param: value : the det_db_box_thresh value
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_box_thresh` must be type of float."
+        self._postprocessor.det_db_box_thresh = value
+
+    @property
+    def det_db_unclip_ratio(self):
+        """
+        Return the det_db_unclip_ratio of DBDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_unclip_ratio
+
+    @det_db_unclip_ratio.setter
+    def det_db_unclip_ratio(self, value):
+        """Set the det_db_unclip_ratio for DBDetectorPostprocessor
+
+        :param: value : the det_db_unclip_ratio value
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_unclip_ratio` must be type of float."
+        self._postprocessor.det_db_unclip_ratio = value
+
+    @property
+    def det_db_score_mode(self):
+        """
+        Return the det_db_score_mode of DBDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_score_mode
+
+    @property
+    def det_db_box_type(self):
+        """
+        Return the det_db_score_mode of DBDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_box_type
+
+    @det_db_box_type.setter
+    def det_db_box_type(self, value):
+        """Set the det_db_score_mode for DBDetectorPostprocessor
+
+        :param: value : the det_db_score_mode value
+        """
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._postprocessor.det_db_box_type = value
+
+    @det_db_score_mode.setter
+    def det_db_score_mode(self, value):
+        """Set the det_db_score_mode for DBDetectorPostprocessor
+
+        :param: value : the det_db_score_mode value
+        """
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._postprocessor.det_db_score_mode = value
+
+    @property
+    def use_dilation(self):
+        """
+        Return the use_dilation of DBDetectorPostprocessor
+        """
+        return self._postprocessor.use_dilation
+
+    @use_dilation.setter
+    def use_dilation(self, value):
+        """Set the use_dilation for DBDetectorPostprocessor
+
+        :param: value : the use_dilation value
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `use_dilation` must be type of bool."
+        self._postprocessor.use_dilation = value
+
+
+class DBCURVEDetectorPostprocessor:
+    def __init__(self):
+        """
+        Create a postprocessor for DBDetectorModel
+        """
+        self._postprocessor = C.vision.ocr.DBCURVEDetectorPostprocessor()
+
+    def run(self, runtime_results, batch_det_img_info):
+        """Postprocess the runtime results for DBDetectorModel
+
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :param: batch_det_img_info: (list of std::array<int, 4>)The output of det_preprocessor
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, batch_det_img_info)
+
+    @property
+    def det_db_thresh(self):
+        """
+        Return the det_db_thresh of DBCURVEDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_thresh
+
+    @det_db_thresh.setter
+    def det_db_thresh(self, value):
+        """Set the det_db_thresh for DBCURVEDetectorPostprocessor
+
+        :param: value : the det_db_thresh value
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_thresh` must be type of float."
+        self._postprocessor.det_db_thresh = value
+
+    @property
+    def det_db_box_thresh(self):
+        """
+        Return the det_db_box_thresh of DBCURVEDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_box_thresh
+
+    @det_db_box_thresh.setter
+    def det_db_box_thresh(self, value):
+        """Set the det_db_box_thresh for DBCURVEDetectorPostprocessor
+
+        :param: value : the det_db_box_thresh value
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_box_thresh` must be type of float."
+        self._postprocessor.det_db_box_thresh = value
+
+    @property
+    def det_db_unclip_ratio(self):
+        """
+        Return the det_db_unclip_ratio of DBCURVEDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_unclip_ratio
+
+    @det_db_unclip_ratio.setter
+    def det_db_unclip_ratio(self, value):
+        """Set the det_db_unclip_ratio for DBCURVEDetectorPostprocessor
+
+        :param: value : the det_db_unclip_ratio value
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_unclip_ratio` must be type of float."
+        self._postprocessor.det_db_unclip_ratio = value
+
+    @property
+    def det_db_score_mode(self):
+        """
+        Return the det_db_score_mode of DBCURVEDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_score_mode
+
+    @property
+    def det_db_box_type(self):
+        """
+        Return the det_db_score_mode of DBDetectorPostprocessor
+        """
+        return self._postprocessor.det_db_box_type
+
+    @det_db_box_type.setter
+    def det_db_box_type(self, value):
+        """Set the det_db_score_mode for DBDetectorPostprocessor
+
+        :param: value : the det_db_score_mode value
+        """
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._postprocessor.det_db_box_type = value
+
+    @det_db_score_mode.setter
+    def det_db_score_mode(self, value):
+        """Set the det_db_score_mode for DBDetectorPostprocessor
+
+        :param: value : the det_db_score_mode value
+        """
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._postprocessor.det_db_score_mode = value
+
+    @property
+    def use_dilation(self):
+        """
+        Return the use_dilation of DBDetectorPostprocessor
+        """
+        return self._postprocessor.use_dilation
+
+    @use_dilation.setter
+    def use_dilation(self, value):
+        """Set the use_dilation for DBDetectorPostprocessor
+
+        :param: value : the use_dilation value
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `use_dilation` must be type of bool."
+        self._postprocessor.use_dilation = value
+
+
+class DBDetector(UltraInferModel):
+    def __init__(
+        self,
+        model_file="",
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load OCR detection model provided by PaddleOCR.
+
+        :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_det_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(DBDetector, self).__init__(runtime_option)
+
+        if len(model_file) == 0:
+            self._model = C.vision.ocr.DBDetector()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.DBDetector(
+                model_file, params_file, self._runtime_option, model_format
+            )
+            assert self.initialized, "DBDetector initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone OCR detection model object
+
+        :return: a new OCR detection model object
+        """
+
+        class DBDetectorClone(DBDetector):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = DBDetectorClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: boxes
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: batch_boxes
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    # Det Preprocessor Property
+    @property
+    def max_side_len(self):
+        return self._model.preprocessor.max_side_len
+
+    @max_side_len.setter
+    def max_side_len(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `max_side_len` must be type of int."
+        self._model.preprocessor.max_side_len = value
+
+    # Det Ppstprocessor Property
+    @property
+    def det_db_thresh(self):
+        return self._model.postprocessor.det_db_thresh
+
+    @det_db_thresh.setter
+    def det_db_thresh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_thresh` must be type of float."
+        self._model.postprocessor.det_db_thresh = value
+
+    @property
+    def det_db_box_thresh(self):
+        return self._model.postprocessor.det_db_box_thresh
+
+    @det_db_box_thresh.setter
+    def det_db_box_thresh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_box_thresh` must be type of float."
+        self._model.postprocessor.det_db_box_thresh = value
+
+    @property
+    def det_db_unclip_ratio(self):
+        return self._model.postprocessor.det_db_unclip_ratio
+
+    @det_db_unclip_ratio.setter
+    def det_db_unclip_ratio(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_unclip_ratio` must be type of float."
+        self._model.postprocessor.det_db_unclip_ratio = value
+
+    @property
+    def det_db_box_type(self):
+        return self._model.postprocessor.det_db_box_type
+
+    @det_db_box_type.setter
+    def det_db_box_type(self, value):
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._model.postprocessor.det_db_box_type = value
+
+    @property
+    def det_db_score_mode(self):
+        return self._model.postprocessor.det_db_score_mode
+
+    @det_db_score_mode.setter
+    def det_db_score_mode(self, value):
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._model.postprocessor.det_db_score_mode = value
+
+    @property
+    def use_dilation(self):
+        return self._model.postprocessor.use_dilation
+
+    @use_dilation.setter
+    def use_dilation(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `use_dilation` must be type of bool."
+        self._model.postprocessor.use_dilation = value
+
+
+class DBCURVEDetector(UltraInferModel):
+    def __init__(
+        self,
+        model_file="",
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load OCR detection model provided by PaddleOCR.
+
+        :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_det_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(DBCURVEDetector, self).__init__(runtime_option)
+
+        if len(model_file) == 0:
+            self._model = C.vision.ocr.DBCURVEDetector()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.DBCURVEDetector(
+                model_file, params_file, self._runtime_option, model_format
+            )
+            assert self.initialized, "DBCURVEDetector initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone OCR detection model object
+
+        :return: a new OCR detection model object
+        """
+
+        class DBCURVEDetectorClone(DBCURVEDetector):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = DBCURVEDetectorClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: boxes
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: batch_boxes
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    # Det Preprocessor Property
+    @property
+    def max_side_len(self):
+        return self._model.preprocessor.max_side_len
+
+    @max_side_len.setter
+    def max_side_len(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `max_side_len` must be type of int."
+        self._model.preprocessor.max_side_len = value
+
+    # Det Ppstprocessor Property
+    @property
+    def det_db_thresh(self):
+        return self._model.postprocessor.det_db_thresh
+
+    @det_db_thresh.setter
+    def det_db_thresh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_thresh` must be type of float."
+        self._model.postprocessor.det_db_thresh = value
+
+    @property
+    def det_db_box_thresh(self):
+        return self._model.postprocessor.det_db_box_thresh
+
+    @det_db_box_thresh.setter
+    def det_db_box_thresh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_box_thresh` must be type of float."
+        self._model.postprocessor.det_db_box_thresh = value
+
+    @property
+    def det_db_unclip_ratio(self):
+        return self._model.postprocessor.det_db_unclip_ratio
+
+    @det_db_unclip_ratio.setter
+    def det_db_unclip_ratio(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `det_db_unclip_ratio` must be type of float."
+        self._model.postprocessor.det_db_unclip_ratio = value
+
+    @property
+    def det_db_box_type(self):
+        return self._model.postprocessor.det_db_box_type
+
+    @det_db_box_type.setter
+    def det_db_box_type(self, value):
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._model.postprocessor.det_db_box_type = value
+
+    @property
+    def det_db_score_mode(self):
+        return self._model.postprocessor.det_db_score_mode
+
+    @det_db_score_mode.setter
+    def det_db_score_mode(self, value):
+        assert isinstance(
+            value, str
+        ), "The value to set `det_db_score_mode` must be type of str."
+        self._model.postprocessor.det_db_score_mode = value
+
+    @property
+    def use_dilation(self):
+        return self._model.postprocessor.use_dilation
+
+    @use_dilation.setter
+    def use_dilation(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `use_dilation` must be type of bool."
+        self._model.postprocessor.use_dilation = value
+
+
+class ClassifierPreprocessor(ProcessorManager):
+    def __init__(self):
+        """Create a preprocessor for ClassifierModel"""
+        super(ClassifierPreprocessor, self).__init__()
+        self._manager = C.vision.ocr.ClassifierPreprocessor()
+
+    def set_normalize(self, mean, std, is_scale):
+        """Set preprocess normalize parameters, please call this API to
+           customize the normalize parameters, otherwise it will use the default
+           normalize parameters.
+        :param: mean: (list of float) mean values
+        :param: std: (list of float) std values
+        :param: is_scale: (boolean) whether to scale
+        """
+        self._manager.set_normalize(mean, std, is_scale)
+
+    @property
+    def cls_image_shape(self):
+        return self._manager.cls_image_shape
+
+    @cls_image_shape.setter
+    def cls_image_shape(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `cls_image_shape` must be type of list."
+        self._manager.cls_image_shape = value
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+
+class ClassifierPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for ClassifierModel"""
+        self._postprocessor = C.vision.ocr.ClassifierPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for ClassifierModel
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+    @property
+    def cls_thresh(self):
+        """
+        Return the cls_thresh of ClassifierPostprocessor
+        """
+        return self._postprocessor.cls_thresh
+
+    @cls_thresh.setter
+    def cls_thresh(self, value):
+        """Set the cls_thresh for ClassifierPostprocessor
+
+        :param: value: the value of cls_thresh
+        """
+        assert isinstance(
+            value, float
+        ), "The value to set `cls_thresh` must be type of float."
+        self._postprocessor.cls_thresh = value
+
+
+class Classifier(UltraInferModel):
+    def __init__(
+        self,
+        model_file="",
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load OCR classification model provided by PaddleOCR.
+
+        :param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(Classifier, self).__init__(runtime_option)
+
+        if len(model_file) == 0:
+            self._model = C.vision.ocr.Classifier()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.Classifier(
+                model_file, params_file, self._runtime_option, model_format
+            )
+            assert self.initialized, "Classifier initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone OCR classification model object
+        :return: a new OCR classification model object
+        """
+
+        class ClassifierClone(Classifier):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = ClassifierClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: cls_label, cls_score
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: list of cls_label, list of cls_score
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @preprocessor.setter
+    def preprocessor(self, value):
+        self._model.preprocessor = value
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    @postprocessor.setter
+    def postprocessor(self, value):
+        self._model.postprocessor = value
+
+    @property
+    def cls_image_shape(self):
+        return self._model.preprocessor.cls_image_shape
+
+    @cls_image_shape.setter
+    def cls_image_shape(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `cls_image_shape` must be type of list."
+        self._model.preprocessor.cls_image_shape = value
+
+    # Cls Postprocessor Property
+    @property
+    def cls_thresh(self):
+        return self._model.postprocessor.cls_thresh
+
+    @cls_thresh.setter
+    def cls_thresh(self, value):
+        assert isinstance(
+            value, float
+        ), "The value to set `cls_thresh` must be type of float."
+        self._model.postprocessor.cls_thresh = value
+
+
+class RecognizerPreprocessor(ProcessorManager):
+    def __init__(self):
+        """Create a preprocessor for RecognizerModel"""
+        super(RecognizerPreprocessor, self).__init__()
+        self._manager = C.vision.ocr.RecognizerPreprocessor()
+
+    @property
+    def static_shape_infer(self):
+        return self._manager.static_shape_infer
+
+    @static_shape_infer.setter
+    def static_shape_infer(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `static_shape_infer` must be type of bool."
+        self._manager.static_shape_infer = value
+
+    def set_normalize(self, mean, std, is_scale):
+        """Set preprocess normalize parameters, please call this API to
+           customize the normalize parameters, otherwise it will use the default
+           normalize parameters.
+        :param: mean: (list of float) mean values
+        :param: std: (list of float) std values
+        :param: is_scale: (boolean) whether to scale
+        """
+        self._manager.set_normalize(mean, std, is_scale)
+
+    @property
+    def rec_image_shape(self):
+        return self._manager.rec_image_shape
+
+    @rec_image_shape.setter
+    def rec_image_shape(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `rec_image_shape` must be type of list."
+        self._manager.rec_image_shape = value
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+
+class RecognizerPostprocessor:
+    def __init__(self, label_path):
+        """Create a postprocessor for RecognizerModel
+        :param label_path: (str)Path of label file
+        """
+        self._postprocessor = C.vision.ocr.RecognizerPostprocessor(label_path)
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for RecognizerModel
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class Recognizer(UltraInferModel):
+    def __init__(
+        self,
+        model_file="",
+        params_file="",
+        label_path="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load OCR recognition model provided by PaddleOCR
+
+        :param model_file: (str)Path of model file, e.g ./ch_PP-OCRv3_rec_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./ch_PP-OCRv3_rec_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param label_path: (str)Path of label file used by OCR recognition model. e.g ./ppocr_keys_v1.txt
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(Recognizer, self).__init__(runtime_option)
+
+        if len(model_file) == 0:
+            self._model = C.vision.ocr.Recognizer()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.Recognizer(
+                model_file, params_file, label_path, self._runtime_option, model_format
+            )
+            assert self.initialized, "Recognizer initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone OCR recognition model object
+        :return: a new OCR recognition model object
+        """
+
+        class RecognizerClone(Recognizer):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = RecognizerClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: rec_text, rec_score
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: list of rec_text, list of rec_score
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @preprocessor.setter
+    def preprocessor(self, value):
+        self._model.preprocessor = value
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    @postprocessor.setter
+    def postprocessor(self, value):
+        self._model.postprocessor = value
+
+    @property
+    def static_shape_infer(self):
+        return self._model.preprocessor.static_shape_infer
+
+    @static_shape_infer.setter
+    def static_shape_infer(self, value):
+        assert isinstance(
+            value, bool
+        ), "The value to set `static_shape_infer` must be type of bool."
+        self._model.preprocessor.static_shape_infer = value
+
+    @property
+    def rec_image_shape(self):
+        return self._model.preprocessor.rec_image_shape
+
+    @rec_image_shape.setter
+    def rec_image_shape(self, value):
+        assert isinstance(
+            value, list
+        ), "The value to set `rec_image_shape` must be type of list."
+        self._model.preprocessor.rec_image_shape = value
+
+
+class StructureV2TablePreprocessor:
+    def __init__(self):
+        """Create a preprocessor for StructureV2Table Model"""
+        self._preprocessor = C.vision.ocr.StructureV2TablePreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for StructureV2TableModel
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class StructureV2TablePostprocessor:
+    def __init__(self, dict_path):
+        """Create a postprocessor for StructureV2Table Model"""
+        self._postprocessor = C.vision.ocr.StructureV2TablePostprocessor(dict_path)
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for StructureV2Table Model
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class StructureV2Table(UltraInferModel):
+    def __init__(
+        self,
+        model_file="",
+        params_file="",
+        table_char_dict_path="",
+        box_shape="ori",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load StructureV2Table model provided by PP-StructureV2.
+
+        :param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param table_char_dict_path: (str)Path of table_char_dict file, e.g ../ppocr/utils/dict/table_structure_dict_ch.txt
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(StructureV2Table, self).__init__(runtime_option)
+
+        if len(model_file) == 0:
+            self._model = C.vision.ocr.StructureV2Table()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.StructureV2Table(
+                model_file,
+                params_file,
+                table_char_dict_path,
+                box_shape,
+                self._runtime_option,
+                model_format,
+            )
+            assert self.initialized, "Classifier initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone StructureV2Table model object
+        :return: a new StructureV2Table model object
+        """
+
+        class StructureV2TableClone(StructureV2Table):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = StructureV2TableClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: bbox, structure
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: list of bbox list, list of structure
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @preprocessor.setter
+    def preprocessor(self, value):
+        self._model.preprocessor = value
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    @postprocessor.setter
+    def postprocessor(self, value):
+        self._model.postprocessor = value
+
+
+class StructureV2LayoutPreprocessor:
+    def __init__(self):
+        """Create a preprocessor for StructureV2Layout Model"""
+        self._preprocessor = C.vision.ocr.StructureV2LayoutPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for StructureV2Layout Model
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class StructureV2LayoutPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for StructureV2Layout Model"""
+        self._postprocessor = C.vision.ocr.StructureV2LayoutPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for StructureV2Layout Model
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class StructureV2Layout(UltraInferModel):
+    def __init__(
+        self,
+        model_file="",
+        params_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load StructureV2Layout model provided by PP-StructureV2.
+
+        :param model_file: (str)Path of model file, e.g ./picodet_lcnet_x1_0_fgd_layout_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./picodet_lcnet_x1_0_fgd_layout_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(StructureV2Layout, self).__init__(runtime_option)
+
+        if len(model_file) == 0:
+            self._model = C.vision.ocr.StructureV2Layout()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.StructureV2Layout(
+                model_file, params_file, self._runtime_option, model_format
+            )
+            assert self.initialized, "StructureV2Layout model initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone StructureV2Layout model object
+        :return: a new StructureV2Table model object
+        """
+
+        class StructureV2LayoutClone(StructureV2Layout):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = StructureV2LayoutClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: bboxes
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: list of bboxes list
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @preprocessor.setter
+    def preprocessor(self, value):
+        self._model.preprocessor = value
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    @postprocessor.setter
+    def postprocessor(self, value):
+        self._model.postprocessor = value
+
+
+class PPOCRv4(UltraInferModel):
+    def __init__(self, det_model=None, cls_model=None, rec_model=None):
+        """Consruct a pipeline with text detector, direction classifier and text recognizer models
+
+        :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector.
+        :param cls_model: (UltraInferModel) The classification model object created by ultrainfer.vision.ocr.Classifier.
+        :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer.
+        """
+        assert (
+            det_model is not None and rec_model is not None
+        ), "The det_model and rec_model cannot be None."
+
+        self.det_model = det_model
+        self.rec_model = rec_model
+        self.cls_model = cls_model
+
+        if cls_model is None:
+            self.system_ = C.vision.ocr.PPOCRv4(det_model._model, rec_model._model)
+        else:
+            self.system_ = C.vision.ocr.PPOCRv4(
+                det_model._model, cls_model._model, rec_model._model
+            )
+
+    def clone(self):
+        """Clone PPOCRv4 pipeline object
+        :return: a new PPOCRv4 pipeline object
+        """
+
+        class PPOCRv4Clone(PPOCRv4):
+            def __init__(self, system):
+                self.system_ = system
+
+        clone_model = PPOCRv4Clone(self.system_.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: OCRResult
+        """
+        return self.system_.predict(input_image)
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: OCRBatchResult
+        """
+        return self.system_.batch_predict(images)
+
+    @property
+    def cls_batch_size(self):
+        return self.system_.cls_batch_size
+
+    @cls_batch_size.setter
+    def cls_batch_size(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `cls_batch_size` must be type of int."
+        self.system_.cls_batch_size = value
+
+    @property
+    def rec_batch_size(self):
+        return self.system_.rec_batch_size
+
+    @rec_batch_size.setter
+    def rec_batch_size(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `rec_batch_size` must be type of int."
+        self.system_.rec_batch_size = value
+
+
+class PPOCRSystemv4(PPOCRv4):
+    def __init__(self, det_model=None, cls_model=None, rec_model=None):
+        logging.warning(
+            "DEPRECATED: fd.vision.ocr.PPOCRSystemv4 is deprecated, "
+            "please use fd.vision.ocr.PPOCRv4 instead."
+        )
+        super(PPOCRSystemv4, self).__init__(det_model, cls_model, rec_model)
+
+    def predict(self, input_image):
+        return super(PPOCRSystemv4, self).predict(input_image)
+
+
+class PPOCRv3(UltraInferModel):
+    def __init__(self, det_model=None, cls_model=None, rec_model=None):
+        """Consruct a pipeline with text detector, direction classifier and text recognizer models
+
+        :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector.
+        :param cls_model: (UltraInferModel) The classification model object created by ultrainfer.vision.ocr.Classifier.
+        :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer.
+        """
+        assert (
+            det_model is not None and rec_model is not None
+        ), "The det_model and rec_model cannot be None."
+        if cls_model is None:
+            self.system_ = C.vision.ocr.PPOCRv3(det_model._model, rec_model._model)
+        else:
+            self.system_ = C.vision.ocr.PPOCRv3(
+                det_model._model, cls_model._model, rec_model._model
+            )
+
+    def clone(self):
+        """Clone PPOCRv3 pipeline object
+        :return: a new PPOCRv3 pipeline object
+        """
+
+        class PPOCRv3Clone(PPOCRv3):
+            def __init__(self, system):
+                self.system_ = system
+
+        clone_model = PPOCRv3Clone(self.system_.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: OCRResult
+        """
+        return self.system_.predict(input_image)
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: OCRBatchResult
+        """
+        return self.system_.batch_predict(images)
+
+    @property
+    def cls_batch_size(self):
+        return self.system_.cls_batch_size
+
+    @cls_batch_size.setter
+    def cls_batch_size(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `cls_batch_size` must be type of int."
+        self.system_.cls_batch_size = value
+
+    @property
+    def rec_batch_size(self):
+        return self.system_.rec_batch_size
+
+    @rec_batch_size.setter
+    def rec_batch_size(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `rec_batch_size` must be type of int."
+        self.system_.rec_batch_size = value
+
+
+class PPOCRSystemv3(PPOCRv3):
+    def __init__(self, det_model=None, cls_model=None, rec_model=None):
+        logging.warning(
+            "DEPRECATED: fd.vision.ocr.PPOCRSystemv3 is deprecated, "
+            "please use fd.vision.ocr.PPOCRv3 instead."
+        )
+        super(PPOCRSystemv3, self).__init__(det_model, cls_model, rec_model)
+
+    def predict(self, input_image):
+        return super(PPOCRSystemv3, self).predict(input_image)
+
+
+class PPOCRv2(UltraInferModel):
+    def __init__(self, det_model=None, cls_model=None, rec_model=None):
+        """Consruct a pipeline with text detector, direction classifier and text recognizer models
+
+        :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector.
+        :param cls_model: (UltraInferModel) The classification model object created by ultrainfer.vision.ocr.Classifier.
+        :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer.
+        """
+        assert (
+            det_model is not None and rec_model is not None
+        ), "The det_model and rec_model cannot be None."
+        if cls_model is None:
+            self.system_ = C.vision.ocr.PPOCRv2(det_model._model, rec_model._model)
+        else:
+            self.system_ = C.vision.ocr.PPOCRv2(
+                det_model._model, cls_model._model, rec_model._model
+            )
+
+    def clone(self):
+        """Clone PPOCRv3 pipeline object
+        :return: a new PPOCRv3 pipeline object
+        """
+
+        class PPOCRv2Clone(PPOCRv2):
+            def __init__(self, system):
+                self.system_ = system
+
+        clone_model = PPOCRv2Clone(self.system_.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: OCRResult
+        """
+        return self.system_.predict(input_image)
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: OCRBatchResult
+        """
+
+        return self.system_.batch_predict(images)
+
+    @property
+    def cls_batch_size(self):
+        return self.system_.cls_batch_size
+
+    @cls_batch_size.setter
+    def cls_batch_size(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `cls_batch_size` must be type of int."
+        self.system_.cls_batch_size = value
+
+    @property
+    def rec_batch_size(self):
+        return self.system_.rec_batch_size
+
+    @rec_batch_size.setter
+    def rec_batch_size(self, value):
+        assert isinstance(
+            value, int
+        ), "The value to set `rec_batch_size` must be type of int."
+        self.system_.rec_batch_size = value
+
+
+class PPOCRSystemv2(PPOCRv2):
+    def __init__(self, det_model=None, cls_model=None, rec_model=None):
+        logging.warning(
+            "DEPRECATED: fd.vision.ocr.PPOCRSystemv2 is deprecated, "
+            "please use fd.vision.ocr.PPOCRv2 instead."
+        )
+        super(PPOCRSystemv2, self).__init__(det_model, cls_model, rec_model)
+
+    def predict(self, input_image):
+        return super(PPOCRSystemv2, self).predict(input_image)
+
+
+class PPStructureV2Table(UltraInferModel):
+    def __init__(self, det_model=None, rec_model=None, table_model=None):
+        """Consruct a pipeline with text detector, text recognizer and table recognizer models
+
+        :param det_model: (UltraInferModel) The detection model object created by ultrainfer.vision.ocr.DBDetector.
+        :param rec_model: (UltraInferModel) The recognition model object created by ultrainfer.vision.ocr.Recognizer.
+        :param table_model: (UltraInferModel) The table recognition model object created by ultrainfer.vision.ocr.Table.
+        """
+        assert (
+            det_model is not None and rec_model is not None and table_model is not None
+        ), "The det_model, rec_model and table_model cannot be None."
+        self.system_ = C.vision.ocr.PPStructureV2Table(
+            det_model._model,
+            rec_model._model,
+            table_model._model,
+        )
+
+    def clone(self):
+        """Clone PPStructureV2Table pipeline object
+        :return: a new PPStructureV2Table pipeline object
+        """
+
+        class PPStructureV2TableClone(PPStructureV2Table):
+            def __init__(self, system):
+                self.system_ = system
+
+        clone_model = PPStructureV2TableClone(self.system_.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: OCRResult
+        """
+        return self.system_.predict(input_image)
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: OCRBatchResult
+        """
+
+        return self.system_.batch_predict(images)
+
+
+class PPStructureV2TableSystem(PPStructureV2Table):
+    def __init__(self, det_model=None, rec_model=None, table_model=None):
+        logging.warning(
+            "DEPRECATED: fd.vision.ocr.PPStructureV2TableSystem is deprecated, "
+            "please use fd.vision.ocr.PPStructureV2Table instead."
+        )
+        super(PPStructureV2TableSystem, self).__init__(
+            det_model, rec_model, table_model
+        )
+
+    def predict(self, input_image):
+        return super(PPStructureV2TableSystem, self).predict(input_image)
+
+
+class StructureV2SERViLayoutXLMModelPreprocessor:
+    def __init__(self, ser_dict_path, use_gpu=True):
+        """Create a preprocessor for Ser-Vi-LayoutXLM model.
+        :param: ser_dict_path: (str) class file path
+        :param: use_gpu: (bool) whether use gpu to OCR process
+        """
+        self._manager = None
+        from paddleocr import PaddleOCR
+
+        self.ocr_engine = PaddleOCR(
+            use_angle_cls=False,
+            det_model_dir=None,
+            rec_model_dir=None,
+            show_log=False,
+            use_gpu=use_gpu,
+        )
+
+        pre_process_list = [
+            {
+                "VQATokenLabelEncode": {
+                    "class_path": ser_dict_path,
+                    "contains_re": False,
+                    "ocr_engine": self.ocr_engine,
+                    "order_method": "tb-yx",
+                }
+            },
+            {"VQATokenPad": {"max_seq_len": 512, "return_attention_mask": True}},
+            {"VQASerTokenChunk": {"max_seq_len": 512, "return_attention_mask": True}},
+            {"Resize": {"size": [224, 224]}},
+            {
+                "NormalizeImage": {
+                    "std": [58.395, 57.12, 57.375],
+                    "mean": [123.675, 116.28, 103.53],
+                    "scale": "1",
+                    "order": "hwc",
+                }
+            },
+            {"ToCHWImage": None},
+            {
+                "KeepKeys": {
+                    "keep_keys": [
+                        "input_ids",
+                        "bbox",
+                        "attention_mask",
+                        "token_type_ids",
+                        "image",
+                        "labels",
+                        "segment_offset_id",
+                        "ocr_info",
+                        "entities",
+                    ]
+                }
+            },
+        ]
+
+        self.preprocess_op = create_operators(pre_process_list, {"infer_mode": True})
+
+    def _transform(self, data, ops=None):
+        """transform"""
+        if ops is None:
+            ops = []
+        for op in ops:
+            data = op(data)
+            if data is None:
+                return None
+        return data
+
+    def run(self, input_im):
+        """Run preprocess of  Ser-Vi-LayoutXLM model
+        :param: input_ims: (numpy.ndarray) input image
+        """
+        ori_im = input_im.copy()
+        data = {"image": input_im}
+        data = transform(data, self.preprocess_op)
+
+        for idx in range(len(data)):
+            if isinstance(data[idx], np.ndarray):
+                data[idx] = np.expand_dims(data[idx], axis=0)
+            else:
+                data[idx] = [data[idx]]
+
+        return data
+
+
+class StructureV2SERViLayoutXLMModelPostprocessor:
+    def __init__(self, class_path):
+        """Create a postprocessor for Ser-Vi-LayoutXLM model.
+        :param: class_path: (string) class file path
+        """
+        self.postprocessor_op = VQASerTokenLayoutLMPostProcess(class_path)
+
+    def run(self, preds, batch=None, *args, **kwargs):
+        """Run postprocess of  Ser-Vi-LayoutXLM model.
+        :param: preds: (list) results of infering
+        """
+        return self.postprocessor_op(preds, batch, *args, **kwargs)
+
+
+class StructureV2SERViLayoutXLMModel(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        ser_dict_path,
+        class_path,
+        config_file="",
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load SERViLayoutXLM model provided by PP-StructureV2.
+
+        :param model_file: (str)Path of model file, e.g ./ser_vi_layout_xlm/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./ser_vi_layout_xlm/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param ser_dict_path: (str) class file path
+        :param class_path: (str) class file path
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model.
+        """
+        super(StructureV2SERViLayoutXLMModel, self).__init__(runtime_option)
+
+        assert (
+            self._runtime_option.backend != 0
+        ), "Runtime Option required backend setting."
+        self._model = C.vision.ocr.StructureV2SERViLayoutXLMModel(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+
+        assert self.initialized, "SERViLayoutXLM model initialize failed."
+
+        self.preprocessor = StructureV2SERViLayoutXLMModelPreprocessor(ser_dict_path)
+        self.postprocesser = StructureV2SERViLayoutXLMModelPostprocessor(class_path)
+
+        self.input_name_0 = self._model.get_input_info(0).name
+        self.input_name_1 = self._model.get_input_info(1).name
+        self.input_name_2 = self._model.get_input_info(2).name
+        self.input_name_3 = self._model.get_input_info(3).name
+
+    def predict(self, image):
+        assert isinstance(image, np.ndarray), "predict recives numpy.ndarray(BGR)"
+
+        data = self.preprocessor.run(image)
+        infer_input = {
+            self.input_name_0: data[0],
+            self.input_name_1: data[1],
+            self.input_name_2: data[2],
+            self.input_name_3: data[3],
+        }
+
+        infer_result = self._model.infer(infer_input)
+        infer_result = infer_result[0]
+
+        post_result = self.postprocesser.run(
+            infer_result, segment_offset_ids=data[6], ocr_infos=data[7]
+        )
+
+        return post_result
+
+    def batch_predict(self, image_list):
+        assert isinstance(image_list, list) and isinstance(
+            image_list[0], np.ndarray
+        ), "batch_predict recives list of numpy.ndarray(BGR)"
+
+        # reading and preprocessing images
+        datas = None
+        for image in image_list:
+            data = self.preprocessor.run(image)
+
+            # concatenate data to batch
+            if datas == None:
+                datas = data
+            else:
+                for idx in range(len(data)):
+                    if isinstance(data[idx], np.ndarray):
+                        datas[idx] = np.concatenate((datas[idx], data[idx]), axis=0)
+                    else:
+                        datas[idx].extend(data[idx])
+
+        # infer
+        infer_inputs = {
+            self.input_name_0: datas[0],
+            self.input_name_1: datas[1],
+            self.input_name_2: datas[2],
+            self.input_name_3: datas[3],
+        }
+
+        infer_results = self._model.infer(infer_inputs)
+        infer_results = infer_results[0]
+
+        # postprocessing
+        post_results = self.postprocesser.run(
+            infer_results, segment_offset_ids=datas[6], ocr_infos=datas[7]
+        )
+
+        return post_results
+
+
+class PyOnlyFormulaRecognitionModel(PyOnlyVisionModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        self._model_file = model_file
+        self._params_file = params_file
+        self._model_format = model_format
+        super().__init__(runtime_option)
+        self._config = load_config(config_file)
+        self._preprocessor = _PyOnlyFormulaRecognitionPreprocessor()
+        self._postprocessor = _PyOnlyFormulaRecognitionPostprocessor(
+            **self._config["PostProcess"]
+        )
+
+    def model_name():
+        return "PyOnlyFormulaRecognitionModel"
+
+    def batch_predict(self, imgs):
+        data_list = []
+        for img in imgs:
+            data = {"img": img}
+            data = self._preprocessor.run(data)
+            data_list.append(data)
+
+        input_name = self._runtime.get_input_info(0).name
+        imgs = np.stack([data["img"] for data in data_list], axis=0, dtype=np.float32)
+        imgs = np.ascontiguousarray(imgs)
+        output_arrs = self._runtime.infer({input_name: imgs})
+
+        results = []
+        for score_map in output_arrs[0]:
+            data = {"score_map": score_map}
+            result = self._postprocessor.run(data)
+            results.append(result)
+        return results
+
+    def _update_option(self):
+        self._option.set_model_path(
+            self._model_file, self._params_file, self._model_format
+        )
+
+
+class _PyOnlyFormulaRecognitionPreprocessor(object):
+    def __init__(self):
+        super().__init__()
+        processors = self._build_processors()
+        self._processor_chain = PyOnlyProcessorChain(processors)
+
+    def run(self, data):
+        return self._processor_chain(data)
+
+    def _build_processors(self):
+        processors = []
+        processors.append(P.LaTeXOCRReisizeNormImg())
+        return processors
+
+
+class _PyOnlyFormulaRecognitionPostprocessor(object):
+    def __init__(self, **kwargs):
+        super().__init__()
+        if kwargs.get("name") == "LaTeXOCRDecode":
+            self.op = LaTeXOCRDecode(
+                character_list=kwargs.get("character_dict"),
+            )
+        else:
+            raise Exception()
+
+    def run(self, data):
+        rec_text = self.op.apply(data)
+        rec_text = rec_text["rec_text"]
+        result = _PyOnlyFormulaRecognitionResult(rec_text=rec_text)
+        return result
+
+
+@dataclass
+class _PyOnlyFormulaRecognitionResult(object):
+    rec_text: str
+
+
+class LaTeXOCRDecode(object):
+    def __init__(self, character_list=None):
+        super().__init__()
+        character_list = character_list
+        temp_path = tempfile.gettempdir()
+        rec_char_dict_path = os.path.join(temp_path, "latexocr_tokenizer.json")
+        try:
+            with open(rec_char_dict_path, "w") as f:
+                json.dump(character_list, f)
+        except Exception as e:
+            print(f"创建 latexocr_tokenizer.json 文件失败, 原因{str(e)}")
+        self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
+
+    def post_process(self, s):
+        text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
+        letter = "[a-zA-Z]"
+        noletter = "[\W_^\d]"
+        names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)]
+        s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
+        news = s
+        while True:
+            s = news
+            news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, noletter), r"\1\2", s)
+            news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, letter), r"\1\2", news)
+            news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news)
+            if news == s:
+                break
+        return s
+
+    def decode(self, tokens):
+        if len(tokens.shape) == 1:
+            tokens = tokens[None, :]
+
+        dec = [self.tokenizer.decode(tok) for tok in tokens]
+        dec_str_list = [
+            "".join(detok.split(" "))
+            .replace("Ġ", " ")
+            .replace("[EOS]", "")
+            .replace("[BOS]", "")
+            .replace("[PAD]", "")
+            .strip()
+            for detok in dec
+        ]
+        return [str(self.post_process(dec_str)) for dec_str in dec_str_list]
+
+    def apply(self, pred):
+        key = next(iter(pred))
+        preds = np.array(pred[key])
+        text = self.decode(preds)
+        return {"rec_text": text[0]}
diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py
new file mode 100755
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py
new file mode 100755
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py
new file mode 100755
index 0000000000..7d42cf3a5c
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/operators.py
@@ -0,0 +1,104 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import numpy as np
+
+
+class Resize(object):
+    def __init__(self, size=(640, 640), **kwargs):
+        self.size = size
+
+    def resize_image(self, img):
+        resize_h, resize_w = self.size
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        return img, [ratio_h, ratio_w]
+
+    def __call__(self, data):
+        img = data["image"]
+        if "polys" in data:
+            text_polys = data["polys"]
+
+        img_resize, [ratio_h, ratio_w] = self.resize_image(img)
+        if "polys" in data:
+            new_boxes = []
+            for box in text_polys:
+                new_box = []
+                for cord in box:
+                    new_box.append([cord[0] * ratio_w, cord[1] * ratio_h])
+                new_boxes.append(new_box)
+            data["polys"] = np.array(new_boxes, dtype=np.float32)
+        data["image"] = img_resize
+        return data
+
+
+class NormalizeImage(object):
+    """normalize image such as substract mean, divide std"""
+
+    def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+
+        shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype("float32")
+        self.std = np.array(std).reshape(shape).astype("float32")
+
+    def __call__(self, data):
+        img = data["image"]
+        from PIL import Image
+
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage"
+        data["image"] = (img.astype("float32") * self.scale - self.mean) / self.std
+        return data
+
+
+class ToCHWImage(object):
+    """convert hwc image to chw image"""
+
+    def __init__(self, **kwargs):
+        pass
+
+    def __call__(self, data):
+        img = data["image"]
+        from PIL import Image
+
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        data["image"] = img.transpose((2, 0, 1))
+        return data
+
+
+class KeepKeys(object):
+    def __init__(self, keep_keys, **kwargs):
+        self.keep_keys = keep_keys
+
+    def __call__(self, data):
+        data_list = []
+        for key in self.keep_keys:
+            data_list.append(data[key])
+        return data_list
diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py
new file mode 100755
index 0000000000..32ec94cf80
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/transforms.py
@@ -0,0 +1,47 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .vqa_utils import *
+from .operators import *
+
+
+def transform(data, ops=None):
+    """transform"""
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+
+
+def create_operators(op_param_list, global_config=None):
+    """
+    create operators based on the config
+
+    Args:
+        params(list): a dict list, used to create some operators
+    """
+    assert isinstance(op_param_list, list), "operator config should be a list"
+    ops = []
+    for operator in op_param_list:
+        assert isinstance(operator, dict) and len(operator) == 1, "yaml format error"
+        op_name = list(operator)[0]
+        param = {} if operator[op_name] is None else operator[op_name]
+        if global_config is not None:
+            param.update(global_config)
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
diff --git a/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py
new file mode 100755
index 0000000000..4e9627301e
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/ocr/ppocr/utils/ser_vi_layoutxlm/vqa_utils.py
@@ -0,0 +1,624 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+import numpy as np
+import json
+import copy
+from copy import deepcopy
+
+from collections import defaultdict
+
+
+def order_by_tbyx(ocr_info):
+    res = sorted(ocr_info, key=lambda r: (r["bbox"][1], r["bbox"][0]))
+    for i in range(len(res) - 1):
+        for j in range(i, 0, -1):
+            if abs(res[j + 1]["bbox"][1] - res[j]["bbox"][1]) < 20 and (
+                res[j + 1]["bbox"][0] < res[j]["bbox"][0]
+            ):
+                tmp = deepcopy(res[j])
+                res[j] = deepcopy(res[j + 1])
+                res[j + 1] = deepcopy(tmp)
+            else:
+                break
+    return res
+
+
+def load_vqa_bio_label_maps(label_map_path):
+    with open(label_map_path, "r", encoding="utf-8") as fin:
+        lines = fin.readlines()
+    old_lines = [line.strip() for line in lines]
+    lines = ["O"]
+    for line in old_lines:
+        # "O" has already been in lines
+        if line.upper() in ["OTHER", "OTHERS", "IGNORE"]:
+            continue
+        lines.append(line)
+    labels = ["O"]
+    for line in lines[1:]:
+        labels.append("B-" + line)
+        labels.append("I-" + line)
+    label2id_map = {label.upper(): idx for idx, label in enumerate(labels)}
+    id2label_map = {idx: label.upper() for idx, label in enumerate(labels)}
+    return label2id_map, id2label_map
+
+
+class VQATokenLabelEncode(object):
+    """
+    Label encode for NLP VQA methods
+    """
+
+    def __init__(
+        self,
+        class_path,
+        contains_re=False,
+        add_special_ids=False,
+        algorithm="LayoutXLM",
+        use_textline_bbox_info=True,
+        order_method=None,
+        infer_mode=False,
+        ocr_engine=None,
+        **kwargs
+    ):
+        super(VQATokenLabelEncode, self).__init__()
+        from paddlenlp.transformers import (
+            LayoutXLMTokenizer,
+            LayoutLMTokenizer,
+            LayoutLMv2Tokenizer,
+        )
+
+        tokenizer_dict = {
+            "LayoutXLM": {
+                "class": LayoutXLMTokenizer,
+                "pretrained_model": "layoutxlm-base-uncased",
+            },
+            "LayoutLM": {
+                "class": LayoutLMTokenizer,
+                "pretrained_model": "layoutlm-base-uncased",
+            },
+            "LayoutLMv2": {
+                "class": LayoutLMv2Tokenizer,
+                "pretrained_model": "layoutlmv2-base-uncased",
+            },
+        }
+        self.contains_re = contains_re
+        tokenizer_config = tokenizer_dict[algorithm]
+        self.tokenizer = tokenizer_config["class"].from_pretrained(
+            tokenizer_config["pretrained_model"]
+        )
+        self.label2id_map, id2label_map = load_vqa_bio_label_maps(class_path)
+        self.add_special_ids = add_special_ids
+        self.infer_mode = infer_mode
+        self.ocr_engine = ocr_engine
+        self.use_textline_bbox_info = use_textline_bbox_info
+        self.order_method = order_method
+        assert self.order_method in [None, "tb-yx"]
+
+    def split_bbox(self, bbox, text, tokenizer):
+        words = text.split()
+        token_bboxes = []
+        curr_word_idx = 0
+        x1, y1, x2, y2 = bbox
+        unit_w = (x2 - x1) / len(text)
+        for idx, word in enumerate(words):
+            curr_w = len(word) * unit_w
+            word_bbox = [x1, y1, x1 + curr_w, y2]
+            token_bboxes.extend([word_bbox] * len(tokenizer.tokenize(word)))
+            x1 += (len(word) + 1) * unit_w
+        return token_bboxes
+
+    def filter_empty_contents(self, ocr_info):
+        """
+        find out the empty texts and remove the links
+        """
+        new_ocr_info = []
+        empty_index = []
+        for idx, info in enumerate(ocr_info):
+            if len(info["transcription"]) > 0:
+                new_ocr_info.append(copy.deepcopy(info))
+            else:
+                empty_index.append(info["id"])
+
+        for idx, info in enumerate(new_ocr_info):
+            new_link = []
+            for link in info["linking"]:
+                if link[0] in empty_index or link[1] in empty_index:
+                    continue
+                new_link.append(link)
+            new_ocr_info[idx]["linking"] = new_link
+        return new_ocr_info
+
+    def __call__(self, data):
+        # load bbox and label info
+        ocr_info = self._load_ocr_info(data)
+
+        for idx in range(len(ocr_info)):
+            if "bbox" not in ocr_info[idx]:
+                ocr_info[idx]["bbox"] = self.trans_poly_to_bbox(ocr_info[idx]["points"])
+
+        if self.order_method == "tb-yx":
+            ocr_info = order_by_tbyx(ocr_info)
+
+        # for re
+        train_re = self.contains_re and not self.infer_mode
+        if train_re:
+            ocr_info = self.filter_empty_contents(ocr_info)
+
+        height, width, _ = data["image"].shape
+
+        words_list = []
+        bbox_list = []
+        input_ids_list = []
+        token_type_ids_list = []
+        segment_offset_id = []
+        gt_label_list = []
+
+        entities = []
+
+        if train_re:
+            relations = []
+            id2label = {}
+            entity_id_to_index_map = {}
+            empty_entity = set()
+
+        data["ocr_info"] = copy.deepcopy(ocr_info)
+
+        for info in ocr_info:
+            text = info["transcription"]
+            if len(text) <= 0:
+                continue
+            if train_re:
+                # for re
+                if len(text) == 0:
+                    empty_entity.add(info["id"])
+                    continue
+                id2label[info["id"]] = info["label"]
+                relations.extend([tuple(sorted(l)) for l in info["linking"]])
+            # smooth_box
+            info["bbox"] = self.trans_poly_to_bbox(info["points"])
+
+            encode_res = self.tokenizer.encode(
+                text,
+                pad_to_max_seq_len=False,
+                return_attention_mask=True,
+                return_token_type_ids=True,
+            )
+
+            if not self.add_special_ids:
+                # TODO: use tok.all_special_ids to remove
+                encode_res["input_ids"] = encode_res["input_ids"][1:-1]
+                encode_res["token_type_ids"] = encode_res["token_type_ids"][1:-1]
+                encode_res["attention_mask"] = encode_res["attention_mask"][1:-1]
+
+            if self.use_textline_bbox_info:
+                bbox = [info["bbox"]] * len(encode_res["input_ids"])
+            else:
+                bbox = self.split_bbox(
+                    info["bbox"], info["transcription"], self.tokenizer
+                )
+            if len(bbox) <= 0:
+                continue
+            bbox = self._smooth_box(bbox, height, width)
+            if self.add_special_ids:
+                bbox.insert(0, [0, 0, 0, 0])
+                bbox.append([0, 0, 0, 0])
+
+            # parse label
+            if not self.infer_mode:
+                label = info["label"]
+                gt_label = self._parse_label(label, encode_res)
+
+            # construct entities for re
+            if train_re:
+                if gt_label[0] != self.label2id_map["O"]:
+                    entity_id_to_index_map[info["id"]] = len(entities)
+                    label = label.upper()
+                    entities.append(
+                        {
+                            "start": len(input_ids_list),
+                            "end": len(input_ids_list) + len(encode_res["input_ids"]),
+                            "label": label.upper(),
+                        }
+                    )
+            else:
+                entities.append(
+                    {
+                        "start": len(input_ids_list),
+                        "end": len(input_ids_list) + len(encode_res["input_ids"]),
+                        "label": "O",
+                    }
+                )
+            input_ids_list.extend(encode_res["input_ids"])
+            token_type_ids_list.extend(encode_res["token_type_ids"])
+            bbox_list.extend(bbox)
+            words_list.append(text)
+            segment_offset_id.append(len(input_ids_list))
+            if not self.infer_mode:
+                gt_label_list.extend(gt_label)
+
+        data["input_ids"] = input_ids_list
+        data["token_type_ids"] = token_type_ids_list
+        data["bbox"] = bbox_list
+        data["attention_mask"] = [1] * len(input_ids_list)
+        data["labels"] = gt_label_list
+        data["segment_offset_id"] = segment_offset_id
+        data["tokenizer_params"] = dict(
+            padding_side=self.tokenizer.padding_side,
+            pad_token_type_id=self.tokenizer.pad_token_type_id,
+            pad_token_id=self.tokenizer.pad_token_id,
+        )
+        data["entities"] = entities
+
+        if train_re:
+            data["relations"] = relations
+            data["id2label"] = id2label
+            data["empty_entity"] = empty_entity
+            data["entity_id_to_index_map"] = entity_id_to_index_map
+        return data
+
+    def trans_poly_to_bbox(self, poly):
+        x1 = int(np.min([p[0] for p in poly]))
+        x2 = int(np.max([p[0] for p in poly]))
+        y1 = int(np.min([p[1] for p in poly]))
+        y2 = int(np.max([p[1] for p in poly]))
+        return [x1, y1, x2, y2]
+
+    def _load_ocr_info(self, data):
+        if self.infer_mode:
+            ocr_result = self.ocr_engine.ocr(data["image"], cls=False)[0]
+            ocr_info = []
+            for res in ocr_result:
+                ocr_info.append(
+                    {
+                        "transcription": res[1][0],
+                        "bbox": self.trans_poly_to_bbox(res[0]),
+                        "points": res[0],
+                    }
+                )
+            return ocr_info
+        else:
+            info = data["label"]
+            # read text info
+            info_dict = json.loads(info)
+            return info_dict
+
+    def _smooth_box(self, bboxes, height, width):
+        bboxes = np.array(bboxes)
+        bboxes[:, 0] = bboxes[:, 0] * 1000 / width
+        bboxes[:, 2] = bboxes[:, 2] * 1000 / width
+        bboxes[:, 1] = bboxes[:, 1] * 1000 / height
+        bboxes[:, 3] = bboxes[:, 3] * 1000 / height
+        bboxes = bboxes.astype("int64").tolist()
+        return bboxes
+
+    def _parse_label(self, label, encode_res):
+        gt_label = []
+        if label.lower() in ["other", "others", "ignore"]:
+            gt_label.extend([0] * len(encode_res["input_ids"]))
+        else:
+            gt_label.append(self.label2id_map[("b-" + label).upper()])
+            gt_label.extend(
+                [self.label2id_map[("i-" + label).upper()]]
+                * (len(encode_res["input_ids"]) - 1)
+            )
+        return gt_label
+
+
+class VQATokenPad(object):
+    def __init__(
+        self,
+        max_seq_len=512,
+        pad_to_max_seq_len=True,
+        return_attention_mask=True,
+        return_token_type_ids=True,
+        truncation_strategy="longest_first",
+        return_overflowing_tokens=False,
+        return_special_tokens_mask=False,
+        infer_mode=False,
+        **kwargs
+    ):
+
+        self.max_seq_len = max_seq_len
+        self.pad_to_max_seq_len = max_seq_len
+        self.return_attention_mask = return_attention_mask
+        self.return_token_type_ids = return_token_type_ids
+        self.truncation_strategy = truncation_strategy
+        self.return_overflowing_tokens = return_overflowing_tokens
+        self.return_special_tokens_mask = return_special_tokens_mask
+        self.infer_mode = infer_mode
+
+    def __call__(self, data):
+        import paddle
+
+        self.pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
+        needs_to_be_padded = (
+            self.pad_to_max_seq_len and len(data["input_ids"]) < self.max_seq_len
+        )
+
+        if needs_to_be_padded:
+            if "tokenizer_params" in data:
+                tokenizer_params = data.pop("tokenizer_params")
+            else:
+                tokenizer_params = dict(
+                    padding_side="right", pad_token_type_id=0, pad_token_id=1
+                )
+
+            difference = self.max_seq_len - len(data["input_ids"])
+            if tokenizer_params["padding_side"] == "right":
+                if self.return_attention_mask:
+                    data["attention_mask"] = [1] * len(data["input_ids"]) + [
+                        0
+                    ] * difference
+                if self.return_token_type_ids:
+                    data["token_type_ids"] = (
+                        data["token_type_ids"]
+                        + [tokenizer_params["pad_token_type_id"]] * difference
+                    )
+                if self.return_special_tokens_mask:
+                    data["special_tokens_mask"] = (
+                        data["special_tokens_mask"] + [1] * difference
+                    )
+                data["input_ids"] = (
+                    data["input_ids"] + [tokenizer_params["pad_token_id"]] * difference
+                )
+                if not self.infer_mode:
+                    data["labels"] = (
+                        data["labels"] + [self.pad_token_label_id] * difference
+                    )
+                data["bbox"] = data["bbox"] + [[0, 0, 0, 0]] * difference
+            elif tokenizer_params["padding_side"] == "left":
+                if self.return_attention_mask:
+                    data["attention_mask"] = [0] * difference + [1] * len(
+                        data["input_ids"]
+                    )
+                if self.return_token_type_ids:
+                    data["token_type_ids"] = [
+                        tokenizer_params["pad_token_type_id"]
+                    ] * difference + data["token_type_ids"]
+                if self.return_special_tokens_mask:
+                    data["special_tokens_mask"] = [1] * difference + data[
+                        "special_tokens_mask"
+                    ]
+                data["input_ids"] = [
+                    tokenizer_params["pad_token_id"]
+                ] * difference + data["input_ids"]
+                if not self.infer_mode:
+                    data["labels"] = [self.pad_token_label_id] * difference + data[
+                        "labels"
+                    ]
+                data["bbox"] = [[0, 0, 0, 0]] * difference + data["bbox"]
+        else:
+            if self.return_attention_mask:
+                data["attention_mask"] = [1] * len(data["input_ids"])
+
+        for key in data:
+            if key in [
+                "input_ids",
+                "labels",
+                "token_type_ids",
+                "bbox",
+                "attention_mask",
+            ]:
+                if self.infer_mode:
+                    if key != "labels":
+                        length = min(len(data[key]), self.max_seq_len)
+                        data[key] = data[key][:length]
+                    else:
+                        continue
+                data[key] = np.array(data[key], dtype="int64")
+        return data
+
+
+class VQASerTokenChunk(object):
+    def __init__(self, max_seq_len=512, infer_mode=False, **kwargs):
+        self.max_seq_len = max_seq_len
+        self.infer_mode = infer_mode
+
+    def __call__(self, data):
+        encoded_inputs_all = []
+        seq_len = len(data["input_ids"])
+        for index in range(0, seq_len, self.max_seq_len):
+            chunk_beg = index
+            chunk_end = min(index + self.max_seq_len, seq_len)
+            encoded_inputs_example = {}
+            for key in data:
+                if key in [
+                    "label",
+                    "input_ids",
+                    "labels",
+                    "token_type_ids",
+                    "bbox",
+                    "attention_mask",
+                ]:
+                    if self.infer_mode and key == "labels":
+                        encoded_inputs_example[key] = data[key]
+                    else:
+                        encoded_inputs_example[key] = data[key][chunk_beg:chunk_end]
+                else:
+                    encoded_inputs_example[key] = data[key]
+
+            encoded_inputs_all.append(encoded_inputs_example)
+        if len(encoded_inputs_all) == 0:
+            return None
+        return encoded_inputs_all[0]
+
+
+class VQAReTokenChunk(object):
+    def __init__(
+        self, max_seq_len=512, entities_labels=None, infer_mode=False, **kwargs
+    ):
+        self.max_seq_len = max_seq_len
+        self.entities_labels = (
+            {"HEADER": 0, "QUESTION": 1, "ANSWER": 2}
+            if entities_labels is None
+            else entities_labels
+        )
+        self.infer_mode = infer_mode
+
+    def __call__(self, data):
+        # prepare data
+        entities = data.pop("entities")
+        relations = data.pop("relations")
+        encoded_inputs_all = []
+        for index in range(0, len(data["input_ids"]), self.max_seq_len):
+            item = {}
+            for key in data:
+                if key in [
+                    "label",
+                    "input_ids",
+                    "labels",
+                    "token_type_ids",
+                    "bbox",
+                    "attention_mask",
+                ]:
+                    if self.infer_mode and key == "labels":
+                        item[key] = data[key]
+                    else:
+                        item[key] = data[key][index : index + self.max_seq_len]
+                else:
+                    item[key] = data[key]
+            # select entity in current chunk
+            entities_in_this_span = []
+            global_to_local_map = {}  #
+            for entity_id, entity in enumerate(entities):
+                if (
+                    index <= entity["start"] < index + self.max_seq_len
+                    and index <= entity["end"] < index + self.max_seq_len
+                ):
+                    entity["start"] = entity["start"] - index
+                    entity["end"] = entity["end"] - index
+                    global_to_local_map[entity_id] = len(entities_in_this_span)
+                    entities_in_this_span.append(entity)
+
+            # select relations in current chunk
+            relations_in_this_span = []
+            for relation in relations:
+                if (
+                    index <= relation["start_index"] < index + self.max_seq_len
+                    and index <= relation["end_index"] < index + self.max_seq_len
+                ):
+                    relations_in_this_span.append(
+                        {
+                            "head": global_to_local_map[relation["head"]],
+                            "tail": global_to_local_map[relation["tail"]],
+                            "start_index": relation["start_index"] - index,
+                            "end_index": relation["end_index"] - index,
+                        }
+                    )
+            item.update(
+                {
+                    "entities": self.reformat(entities_in_this_span),
+                    "relations": self.reformat(relations_in_this_span),
+                }
+            )
+            if len(item["entities"]) > 0:
+                item["entities"]["label"] = [
+                    self.entities_labels[x] for x in item["entities"]["label"]
+                ]
+                encoded_inputs_all.append(item)
+        if len(encoded_inputs_all) == 0:
+            return None
+        return encoded_inputs_all[0]
+
+    def reformat(self, data):
+        new_data = defaultdict(list)
+        for item in data:
+            for k, v in item.items():
+                new_data[k].append(v)
+        return new_data
+
+
+class VQASerTokenLayoutLMPostProcess(object):
+    """Convert between text-label and text-index"""
+
+    def __init__(self, class_path, **kwargs):
+        super(VQASerTokenLayoutLMPostProcess, self).__init__()
+        label2id_map, self.id2label_map = load_vqa_bio_label_maps(class_path)
+
+        self.label2id_map_for_draw = dict()
+        for key in label2id_map:
+            if key.startswith("I-"):
+                self.label2id_map_for_draw[key] = label2id_map["B" + key[1:]]
+            else:
+                self.label2id_map_for_draw[key] = label2id_map[key]
+
+        self.id2label_map_for_show = dict()
+        for key in self.label2id_map_for_draw:
+            val = self.label2id_map_for_draw[key]
+            if key == "O":
+                self.id2label_map_for_show[val] = key
+            if key.startswith("B-") or key.startswith("I-"):
+                self.id2label_map_for_show[val] = key[2:]
+            else:
+                self.id2label_map_for_show[val] = key
+
+    def __call__(self, preds, batch=None, *args, **kwargs):
+        import paddle
+
+        if isinstance(preds, tuple):
+            preds = preds[0]
+        if isinstance(preds, paddle.Tensor):
+            preds = preds.numpy()
+
+        if batch is not None:
+            return self._metric(preds, batch[5])
+        else:
+            return self._infer(preds, **kwargs)
+
+    def _metric(self, preds, label):
+        pred_idxs = preds.argmax(axis=2)
+        decode_out_list = [[] for _ in range(pred_idxs.shape[0])]
+        label_decode_out_list = [[] for _ in range(pred_idxs.shape[0])]
+
+        for i in range(pred_idxs.shape[0]):
+            for j in range(pred_idxs.shape[1]):
+                if label[i, j] != -100:
+                    label_decode_out_list[i].append(self.id2label_map[label[i, j]])
+                    decode_out_list[i].append(self.id2label_map[pred_idxs[i, j]])
+        return decode_out_list, label_decode_out_list
+
+    def _infer(self, preds, segment_offset_ids, ocr_infos):
+        results = []
+
+        for pred, segment_offset_id, ocr_info in zip(
+            preds, segment_offset_ids, ocr_infos
+        ):
+            pred = np.argmax(pred, axis=1)
+            pred = [self.id2label_map[idx] for idx in pred]
+
+            for idx in range(len(segment_offset_id)):
+                if idx == 0:
+                    start_id = 0
+                else:
+                    start_id = segment_offset_id[idx - 1]
+
+                end_id = segment_offset_id[idx]
+
+                curr_pred = pred[start_id:end_id]
+                curr_pred = [self.label2id_map_for_draw[p] for p in curr_pred]
+
+                if len(curr_pred) <= 0:
+                    pred_id = 0
+                else:
+                    counts = np.bincount(curr_pred)
+                    pred_id = np.argmax(counts)
+                ocr_info[idx]["pred_id"] = int(pred_id)
+                ocr_info[idx]["pred"] = self.id2label_map_for_show[int(pred_id)]
+            results.append(ocr_info)
+        return results
diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py
new file mode 100755
index 0000000000..56f3b12e70
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/perception/__init__.py
@@ -0,0 +1,19 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .paddle3d.smoke import *
+from .paddle3d.petr import *
+from .paddle3d.centerpoint import *
+from .paddle3d.caddn import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py
new file mode 100755
index 0000000000..4648555840
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py
new file mode 100755
index 0000000000..0f746f45d9
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/caddn.py
@@ -0,0 +1,108 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class CaddnPreprocessor:
+    def __init__(self, config_file):
+        """Create a preprocessor for Caddn"""
+        self._preprocessor = C.vision.perception.CaddnPreprocessor(config_file)
+
+    def run(self, input_ims, cam_data, lidar_data):
+        """Preprocess input images for Caddn
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims, cam_data, lidar_data)
+
+
+class CaddnPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for Caddn"""
+        self._postprocessor = C.vision.perception.CaddnPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for Caddn
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of PerceptionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class Caddn(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a Caddn model exported by Caddn.
+
+        :param model_file: (str)Path of model file, e.g ./Caddn.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ./Caddn.pdiparams
+        :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(Caddn, self).__init__(runtime_option)
+
+        self._model = C.vision.perception.Caddn(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "Caddn initialize failed."
+
+    def predict(self, input_image, cam_data, lidar_data):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param: cam_data: (list)The input camera data
+        :param: lidar_data: (list)The input lidar data
+        :return: PerceptionResult
+        """
+        return self._model.predict(input_image, cam_data, lidar_data)
+
+    def batch_predict(self, images, cam_data, lidar_data):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :param: cam_data: (list)The input camera data
+        :param: lidar_data: (list)The input lidar data
+        :return list of PerceptionResult
+        """
+
+        return self._model.batch_predict(images, cam_data, lidar_data)
+
+    @property
+    def preprocessor(self):
+        """Get CaddnPreprocessor object of the loaded model
+
+        :return CaddnPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get CaddnPostprocessor object of the loaded model
+
+        :return CaddnPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py
new file mode 100755
index 0000000000..07f7422be3
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/centerpoint.py
@@ -0,0 +1,92 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class CenterpointPreprocessor:
+    def __init__(self, config_file):
+        """Create a preprocessor for Centerpoint"""
+        self._preprocessor = C.vision.perception.CenterpointPreprocessor(config_file)
+
+    def run(self, point_dirs, num_point_dim, with_timelag):
+        """Preprocess input images for Centerpoint
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(point_dirs, num_point_dim, with_timelag)
+
+
+class Centerpoint(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a Centerpoint model exported by Centerpoint.
+
+        :param model_file: (str)Path of model file, e.g ./Centerpoint.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ./Centerpoint.pdiparams
+        :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(Centerpoint, self).__init__(runtime_option)
+
+        self._model = C.vision.perception.Centerpoint(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "Centerpoint initialize failed."
+
+    def predict(self, point_dir):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: PerceptionResult
+        """
+        return self._model.predict(point_dir)
+
+    def batch_predict(self, points_dir):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of PerceptionResult
+        """
+
+        return self._model.batch_predict(points_dir)
+
+    @property
+    def preprocessor(self):
+        """Get CenterpointPreprocessor object of the loaded model
+
+        :return CenterpointPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get CenterpointPostprocessor object of the loaded model
+
+        :return CenterpointPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py
new file mode 100755
index 0000000000..8ae25c5c47
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/petr.py
@@ -0,0 +1,106 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class PetrPreprocessor:
+    def __init__(self, config_file):
+        """Create a preprocessor for Petr"""
+        self._preprocessor = C.vision.perception.PetrPreprocessor(config_file)
+
+    def run(self, input_ims):
+        """Preprocess input images for Petr
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class PetrPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for Petr"""
+        self._postprocessor = C.vision.perception.PetrPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for Petr
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of PerceptionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class Petr(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a SMoke model exported by Petr.
+
+        :param model_file: (str)Path of model file, e.g ./petr.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ./petr.pdiparams
+        :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(Petr, self).__init__(runtime_option)
+
+        self._model = C.vision.perception.Petr(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "Petr initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: PerceptionResult
+        """
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of PerceptionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get PetrPreprocessor object of the loaded model
+
+        :return PetrPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get PetrPostprocessor object of the loaded model
+
+        :return PetrPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py
new file mode 100755
index 0000000000..d31159cb0f
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/perception/paddle3d/smoke.py
@@ -0,0 +1,106 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class SmokePreprocessor:
+    def __init__(self, config_file):
+        """Create a preprocessor for Smoke"""
+        self._preprocessor = C.vision.perception.SmokePreprocessor(config_file)
+
+    def run(self, input_ims):
+        """Preprocess input images for Smoke
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class SmokePostprocessor:
+    def __init__(self):
+        """Create a postprocessor for Smoke"""
+        self._postprocessor = C.vision.perception.SmokePostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for Smoke
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: list of PerceptionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class Smoke(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a SMoke model exported by Smoke.
+
+        :param model_file: (str)Path of model file, e.g ./smoke.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ./smoke.pdiparams
+        :param config_file: (str)Path of config file, e.g ./infer_cfg.yaml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(Smoke, self).__init__(runtime_option)
+
+        self._model = C.vision.perception.Smoke(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "Smoke initialize failed."
+
+    def predict(self, input_image):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
+        :return: PerceptionResult
+        """
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of PerceptionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get SmokePreprocessor object of the loaded model
+
+        :return SmokePreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get SmokePostprocessor object of the loaded model
+
+        :return SmokePostprocessor
+        """
+        return self._model.postprocessor
diff --git a/libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py
new file mode 100755
index 0000000000..9e54a8c5f3
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/segmentation/__init__.py
@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+from .ppseg import *
diff --git a/libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py
new file mode 100755
index 0000000000..e0a36b960c
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/segmentation/ppseg/__init__.py
@@ -0,0 +1,321 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+import logging
+from dataclasses import dataclass
+from typing import List
+
+import numpy as np
+from skimage import morphology
+
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+from ...common import ProcessorManager
+from ....py_only import PyOnlyProcessorChain
+from ....py_only.vision import PyOnlyVisionModel, processors as P
+from ....utils.misc import load_config
+
+
+class PaddleSegModel(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a image segmentation model exported by PaddleSeg.
+
+        :param model_file: (str)Path of model file, e.g unet/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g unet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param config_file: (str) Path of configuration file for deploy, e.g unet/deploy.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PaddleSegModel, self).__init__(runtime_option)
+
+        # assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.segmentation.PaddleSegModel(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PaddleSeg model initialize failed."
+
+    def predict(self, image):
+        """Predict the segmentation result for an input image
+
+        :param im: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: SegmentationResult
+        """
+        return self._model.predict(image)
+
+    def batch_predict(self, image_list):
+        """Predict the segmentation results for a batch of input images
+
+        :param image_list: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: list of SegmentationResult
+        """
+        return self._model.batch_predict(image_list)
+
+    def clone(self):
+        """Clone PaddleSegModel object
+
+        :return: a new PaddleSegModel object
+        """
+
+        class PaddleSegCloneModel(PaddleSegModel):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = PaddleSegCloneModel(self._model.clone())
+        return clone_model
+
+    @property
+    def preprocessor(self):
+        """Get PaddleSegPreprocessor object of the loaded model
+
+        :return: PaddleSegPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get PaddleSegPostprocessor object of the loaded model
+
+        :return: PaddleSegPostprocessor
+        """
+        return self._model.postprocessor
+
+
+class PaddleSegPreprocessor(ProcessorManager):
+    def __init__(self, config_file):
+        """Create a preprocessor for PaddleSegModel from configuration file
+
+        :param config_file: (str)Path of configuration file, e.g ppliteseg/deploy.yaml
+        """
+        self._manager = C.vision.segmentation.PaddleSegPreprocessor(config_file)
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._manager.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._manager.disable_permute()
+
+    @property
+    def is_vertical_screen(self):
+        """Atrribute of PP-HumanSeg model. Stating Whether the input image is vertical image(height > width), default value is False
+
+        :return: value of is_vertical_screen(bool)
+        """
+        return self._manager.is_vertical_screen
+
+    @is_vertical_screen.setter
+    def is_vertical_screen(self, value):
+        """Set attribute is_vertical_screen of PP-HumanSeg model.
+
+        :param value: (bool)The value to set is_vertical_screen
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `is_vertical_screen` must be type of bool."
+        self._manager.is_vertical_screen = value
+
+
+class PaddleSegPostprocessor:
+    def __init__(self, config_file):
+        """Create a postprocessor for PaddleSegModel from configuration file
+
+        :param config_file: (str)Path of configuration file, e.g ppliteseg/deploy.yaml
+        """
+        self._postprocessor = C.vision.segmentation.PaddleSegPostprocessor(config_file)
+
+    def run(self, runtime_results, imgs_info):
+        """Postprocess the runtime results for PaddleSegModel
+
+        :param runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param imgs_info: The original input images shape info map, key is "shape_info", value is [[image_height, image_width]]
+        :return: list of SegmentationResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, imgs_info)
+
+    @property
+    def apply_softmax(self):
+        """Atrribute of PaddleSeg model. Stating Whether applying softmax operator in the postprocess, default value is False
+
+        :return: value of apply_softmax(bool)
+        """
+        return self._postprocessor.apply_softmax
+
+    @apply_softmax.setter
+    def apply_softmax(self, value):
+        """Set attribute apply_softmax of PaddleSeg model.
+
+        :param value: (bool)The value to set apply_softmax
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `apply_softmax` must be type of bool."
+        self._postprocessor.apply_softmax = value
+
+    @property
+    def store_score_map(self):
+        """Atrribute of PaddleSeg model. Stating Whether storing score map in the SegmentationResult, default value is False
+
+        :return: value of store_score_map(bool)
+        """
+        return self._postprocessor.store_score_map
+
+    @store_score_map.setter
+    def store_score_map(self, value):
+        """Set attribute store_score_map of PaddleSeg model.
+
+        :param value: (bool)The value to set store_score_map
+        """
+        assert isinstance(
+            value, bool
+        ), "The value to set `store_score_map` must be type of bool."
+        self._postprocessor.store_score_map = value
+
+
+class PyOnlyAnomalyDetectionModel(PyOnlyVisionModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        self._model_file = model_file
+        self._params_file = params_file
+        self._model_format = model_format
+        super().__init__(runtime_option)
+        self._config = load_config(config_file)
+        self._preprocessor = _PyOnlyAnomalyDetectionPreprocessor(
+            self._config["Deploy"]["transforms"]
+        )
+        self._postprocessor = _PyOnlyAnomalyDetectionPostprocessor()
+
+    def model_name():
+        return "PyOnlyImageAnomalyDetectionModel"
+
+    def batch_predict(self, imgs):
+        data_list = []
+        for img in imgs:
+            data = {"img": img}
+            data = self._preprocessor.run(data)
+            data_list.append(data)
+
+        input_name = self._runtime.get_input_info(0).name
+        imgs = np.stack([data["img"] for data in data_list], axis=0, dtype=np.float32)
+        imgs = np.ascontiguousarray(imgs)
+        output_arrs = self._runtime.infer({input_name: imgs})
+
+        results = []
+        for score_map in output_arrs[0]:
+            data = {"score_map": score_map}
+            result = self._postprocessor.run(data)
+            results.append(result)
+        return results
+
+    def _update_option(self):
+        self._option.set_model_path(
+            self._model_file, self._params_file, self._model_format
+        )
+
+
+class _PyOnlyAnomalyDetectionPreprocessor(object):
+    def __init__(self, config):
+        super().__init__()
+        processors = self._build_processors(config)
+        processors.append(P.ToCHWImage())
+        self._processor_chain = PyOnlyProcessorChain(processors)
+
+    def run(self, data):
+        return self._processor_chain(data)
+
+    def _build_processors(self, config):
+        processors = []
+        for item in config:
+            tf_type = item["type"]
+            args = {k: v for k, v in item.items() if k != "type"}
+            if tf_type == "Resize":
+                if args.keys() - {
+                    "target_size",
+                    "keep_ratio",
+                    "size_divisor",
+                    "interp",
+                }:
+                    raise ValueError
+                args.setdefault("keep_ratio", False)
+                args.setdefault("size_divisor", None)
+                args.setdefault("interp", "LINEAR")
+                processor = P.Resize(
+                    target_size=args["target_size"],
+                    keep_ratio=args["keep_ratio"],
+                    size_divisor=args["size_divisor"],
+                    interp=args["interp"],
+                )
+            elif tf_type == "ResizeByLong":
+                if args.keys() - {"long_size"}:
+                    raise ValueError
+                args.setdefault("size_divisor", None)
+                args.setdefault("interp", "LINEAR")
+                processor = P.ResizeByLong(target_long_edge=args["long_size"])
+            elif tf_type == "ResizeByShort":
+                if args.keys() - {"short_size"}:
+                    raise ValueError
+                processor = P.ResizeByShort(target_short_edge=args["short_size"])
+            elif tf_type == "Normalize":
+                if args.keys() - {"mean", "std"}:
+                    raise ValueError
+                args.setdefault("mean", 0.5)
+                args.setdefault("std", 0.5)
+                processor = P.Normalize(mean=args["mean"], std=args["std"])
+            else:
+                raise ValueError("Unknown transform type")
+            processors.append(processor)
+        return processors
+
+
+class _PyOnlyAnomalyDetectionPostprocessor(object):
+    def run(self, data):
+        score_map = data["score_map"]
+
+        thred = 0.01
+        mask = score_map[0]
+        mask[mask > thred] = 255
+        mask[mask <= thred] = 0
+        kernel = morphology.disk(4)
+        mask = morphology.opening(mask, kernel)
+        mask = mask.astype(np.uint8)
+
+        result = _PyOnlyAnomalyDetectionResult(
+            label_map=mask.reshape((-1)).tolist(), shape=list(mask.shape)
+        )
+        return result
+
+
+@dataclass
+class _PyOnlyAnomalyDetectionResult(object):
+    label_map: List[int]
+    shape: List[int]
diff --git a/libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py
new file mode 100755
index 0000000000..39cd8d3776
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/sr/__init__.py
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from .ppsr import PPMSVSR, EDVR, BasicVSR
diff --git a/libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py
new file mode 100755
index 0000000000..23474e57e0
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/sr/ppsr/__init__.py
@@ -0,0 +1,122 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class PPMSVSR(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a VSR model exported by PaddleGAN.
+
+        :param model_file: (str)Path of model file, e.g PPMSVSR/inference.pdmodel
+        :param params_file: (str)Path of parameters file, e.g PPMSVSR/inference.pdiparams
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PPMSVSR, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PPMSVSR model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.sr.PPMSVSR(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPMSVSR model initialize failed."
+
+    def predict(self, input_images):
+        """Predict the super resolution frame sequences for an input frame sequences
+
+        :param input_images: list[numpy.ndarray] The input image data, 3-D array with layout HWC, BGR format
+        :return: list[numpy.ndarray]
+        """
+        assert input_images is not None, "The input image data is None."
+        return self._model.predict(input_images)
+
+
+class EDVR(PPMSVSR):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a EDVR model exported by PaddleGAN.
+
+        :param model_file: (str)Path of model file, e.g EDVR/inference.pdmodel
+        :param params_file: (str)Path of parameters file, e.g EDVR/inference.pdiparams
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PPMSVSR, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "EDVR model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.sr.EDVR(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "EDVR model initialize failed."
+
+    def predict(self, input_images):
+        """Predict the super resolution frame sequences for an input frame sequences
+
+        :param input_images: list[numpy.ndarray] The input image data, 3-D array with layout HWC, BGR format
+        :return: list[numpy.ndarray]
+        """
+        assert input_images is not None, "The input image data is None."
+        return self._model.predict(input_images)
+
+
+class BasicVSR(PPMSVSR):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a EDVR model exported by PaddleGAN.
+
+        :param model_file: (str)Path of model file, e.g BasicVSR/inference.pdmodel
+        :param params_file: (str)Path of parameters file, e.g BasicVSR/inference.pdiparams
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PPMSVSR, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "BasicVSR model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.sr.BasicVSR(
+            model_file, params_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "BasicVSR model initialize failed."
+
+    def predict(self, input_images):
+        """Predict the super resolution frame sequences for an input frame sequences
+
+        :param input_images: list[numpy.ndarray] The input image data, 3-D array with layout HWC, BGR format
+        :return: list[numpy.ndarray]
+        """
+        assert input_images is not None, "The input image data is None."
+        return self._model.predict(input_images)
diff --git a/libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py
new file mode 100755
index 0000000000..3ebcf61aeb
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/tracking/__init__.py
@@ -0,0 +1,21 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from ... import c_lib_wrap as C
+from .pptracking import PPTracking
+
+try:
+    TrailRecorder = C.vision.tracking.TrailRecorder
+except:
+    pass
diff --git a/libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py
new file mode 100755
index 0000000000..9724d4cb84
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/tracking/pptracking/__init__.py
@@ -0,0 +1,69 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .... import UltraInferModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class PPTracking(UltraInferModel):
+    def __init__(
+        self,
+        model_file,
+        params_file,
+        config_file,
+        runtime_option=None,
+        model_format=ModelFormat.PADDLE,
+    ):
+        """Load a PPTracking model exported by PaddleDetection.
+
+        :param model_file: (str)Path of model file, e.g pptracking/model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ppyoloe/model.pdiparams
+        :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml
+        :param runtime_option: (ultrainfer.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (ultrainfer.ModelForamt)Model format of the loaded model
+        """
+        super(PPTracking, self).__init__(runtime_option)
+
+        assert (
+            model_format == ModelFormat.PADDLE
+        ), "PPTracking model only support model format of ModelFormat.Paddle now."
+        self._model = C.vision.tracking.PPTracking(
+            model_file, params_file, config_file, self._runtime_option, model_format
+        )
+        assert self.initialized, "PPTracking model initialize failed."
+
+    def predict(self, input_image):
+        """Predict the MOT result for an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: MOTResult
+        """
+        assert input_image is not None, "The input image data is None."
+        return self._model.predict(input_image)
+
+    def bind_recorder(self, val):
+        """Binding tracking trail
+
+        :param val: (TrailRecorder) trail recorder, which is contained object's id and center point sequence
+        :return: None
+        """
+        self._model.bind_recorder(val)
+
+    def unbind_recorder(self):
+        """cancel binding of tracking trail
+
+        :return:
+        """
+        self._model.unbind_recorder()
diff --git a/libs/ultrainfer/python/ultrainfer/vision/utils.py b/libs/ultrainfer/python/ultrainfer/vision/utils.py
new file mode 100755
index 0000000000..b33e443743
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/utils.py
@@ -0,0 +1,290 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+import json
+from .. import c_lib_wrap as C
+
+
+def mask_to_json(result):
+    r_json = {
+        "data": result.data,
+        "shape": result.shape,
+    }
+    return json.dumps(r_json)
+
+
+def detection_to_json(result):
+    masks = []
+    for mask in result.masks:
+        masks.append(mask_to_json(mask))
+    r_json = {
+        "boxes": result.boxes,
+        "scores": result.scores,
+        "label_ids": result.label_ids,
+        "masks": masks,
+        "contain_masks": result.contain_masks,
+    }
+    return json.dumps(r_json)
+
+
+def perception_to_json(result):
+    r_json = {
+        "scores": result.scores,
+        "label_ids": result.label_ids,
+        "boxes": result.boxes,
+        "center": result.center,
+        "observation_angle": result.observation_angle,
+        "yaw_angle": result.yaw_angle,
+        "velocity": result.velocity,
+    }
+    return json.dumps(r_json)
+
+
+def classify_to_json(result):
+    r_json = {
+        "label_ids": result.label_ids,
+        "scores": result.scores,
+    }
+    return json.dumps(r_json)
+
+
+def keypoint_to_json(result):
+    r_json = {
+        "keypoints": result.keypoints,
+        "scores": result.scores,
+        "num_joints": result.num_joints,
+    }
+    return json.dumps(r_json)
+
+
+def ocr_to_json(result):
+    r_json = {
+        "boxes": result.boxes,
+        "text": result.text,
+        "rec_scores": result.rec_scores,
+        "cls_scores": result.cls_scores,
+        "cls_labels": result.cls_labels,
+    }
+    return json.dumps(r_json)
+
+
+def mot_to_json(result):
+    r_json = {
+        "boxes": result.boxes,
+        "ids": result.ids,
+        "scores": result.scores,
+        "class_ids": result.class_ids,
+    }
+    return json.dumps(r_json)
+
+
+def face_detection_to_json(result):
+    r_json = {
+        "boxes": result.boxes,
+        "landmarks": result.landmarks,
+        "scores": result.scores,
+        "landmarks_per_face": result.landmarks_per_face,
+    }
+    return json.dumps(r_json)
+
+
+def face_alignment_to_json(result):
+    r_json = {
+        "landmarks": result.landmarks,
+    }
+    return json.dumps(r_json)
+
+
+def face_recognition_to_json(result):
+    r_json = {
+        "embedding": result.embedding,
+    }
+    return json.dumps(r_json)
+
+
+def segmentation_to_json(result):
+    r_json = {
+        "label_map": result.label_map,
+        "score_map": result.score_map,
+        "shape": result.shape,
+        "contain_score_map": result.contain_score_map,
+    }
+    return json.dumps(r_json)
+
+
+def matting_to_json(result):
+    r_json = {
+        "alpha": result.alpha,
+        "foreground": result.foreground,
+        "shape": result.shape,
+        "contain_foreground": result.contain_foreground,
+    }
+    return json.dumps(r_json)
+
+
+def head_pose_to_json(result):
+    r_json = {
+        "euler_angles": result.euler_angles,
+    }
+    return json.dumps(r_json)
+
+
+def fd_result_to_json(result):
+    if isinstance(result, list):
+        r_list = []
+        for r in result:
+            r_list.append(fd_result_to_json(r))
+        return r_list
+    elif isinstance(result, C.vision.DetectionResult):
+        return detection_to_json(result)
+    elif isinstance(result, C.vision.Mask):
+        return mask_to_json(result)
+    elif isinstance(result, C.vision.ClassifyResult):
+        return classify_to_json(result)
+    elif isinstance(result, C.vision.KeyPointDetectionResult):
+        return keypoint_to_json(result)
+    elif isinstance(result, C.vision.OCRResult):
+        return ocr_to_json(result)
+    elif isinstance(result, C.vision.MOTResult):
+        return mot_to_json(result)
+    elif isinstance(result, C.vision.FaceDetectionResult):
+        return face_detection_to_json(result)
+    elif isinstance(result, C.vision.FaceAlignmentResult):
+        return face_alignment_to_json(result)
+    elif isinstance(result, C.vision.FaceRecognitionResult):
+        return face_recognition_to_json(result)
+    elif isinstance(result, C.vision.SegmentationResult):
+        return segmentation_to_json(result)
+    elif isinstance(result, C.vision.MattingResult):
+        return matting_to_json(result)
+    elif isinstance(result, C.vision.HeadPoseResult):
+        return head_pose_to_json(result)
+    elif isinstance(result, C.vision.PerceptionResult):
+        return perception_to_json(result)
+    else:
+        assert False, "{} Conversion to JSON format is not supported".format(
+            type(result)
+        )
+    return {}
+
+
+def json_to_mask(result):
+    mask = C.vision.Mask()
+    mask.data = result["data"]
+    mask.shape = result["shape"]
+    return mask
+
+
+def json_to_detection(result):
+    masks = []
+    for mask in result["masks"]:
+        masks.append(json_to_mask(json.loads(mask)))
+    det_result = C.vision.DetectionResult()
+    det_result.boxes = result["boxes"]
+    det_result.scores = result["scores"]
+    det_result.label_ids = result["label_ids"]
+    det_result.masks = masks
+    det_result.contain_masks = result["contain_masks"]
+    return det_result
+
+
+def json_to_perception(result):
+    perception_result = C.vision.PerceptionResult()
+    perception_result.scores = result["scores"]
+    perception_result.label_ids = result["label_ids"]
+    perception_result.boxes = result["boxes"]
+    perception_result.center = result["center"]
+    perception_result.observation_angle = result["observation_angle"]
+    perception_result.yaw_angle = result["yaw_angle"]
+    perception_result.velocity = result["velocity"]
+    return perception_result
+
+
+def json_to_classify(result):
+    cls_result = C.vision.ClassifyResult()
+    cls_result.label_ids = result["label_ids"]
+    cls_result.scores = result["scores"]
+    return cls_result
+
+
+def json_to_keypoint(result):
+    kp_result = C.vision.KeyPointDetectionResult()
+    kp_result.keypoints = result["keypoints"]
+    kp_result.scores = result["scores"]
+    kp_result.num_joints = result["num_joints"]
+    return kp_result
+
+
+def json_to_ocr(result):
+    ocr_result = C.vision.OCRResult()
+    ocr_result.boxes = result["boxes"]
+    ocr_result.text = result["text"]
+    ocr_result.rec_scores = result["rec_scores"]
+    ocr_result.cls_scores = result["cls_scores"]
+    ocr_result.cls_labels = result["cls_labels"]
+    return ocr_result
+
+
+def json_to_mot(result):
+    mot_result = C.vision.MOTResult()
+    mot_result.boxes = result["boxes"]
+    mot_result.ids = result["ids"]
+    mot_result.scores = result["scores"]
+    mot_result.class_ids = result["class_ids"]
+    return mot_result
+
+
+def json_to_face_detection(result):
+    face_result = C.vision.FaceDetectionResult()
+    face_result.boxes = result["boxes"]
+    face_result.landmarks = result["landmarks"]
+    face_result.scores = result["scores"]
+    face_result.landmarks_per_face = result["landmarks_per_face"]
+    return face_result
+
+
+def json_to_face_alignment(result):
+    face_result = C.vision.FaceAlignmentResult()
+    face_result.landmarks = result["landmarks"]
+    return face_result
+
+
+def json_to_face_recognition(result):
+    face_result = C.vision.FaceRecognitionResult()
+    face_result.embedding = result["embedding"]
+    return face_result
+
+
+def json_to_segmentation(result):
+    seg_result = C.vision.SegmentationResult()
+    seg_result.label_map = result["label_map"]
+    seg_result.score_map = result["score_map"]
+    seg_result.shape = result["shape"]
+    seg_result.contain_score_map = result["contain_score_map"]
+    return seg_result
+
+
+def json_to_matting(result):
+    matting_result = C.vision.MattingResult()
+    matting_result.alpha = result["alpha"]
+    matting_result.foreground = result["foreground"]
+    matting_result.shape = result["shape"]
+    matting_result.contain_foreground = result["contain_foreground"]
+    return matting_result
+
+
+def json_to_head_pose(result):
+    hp_result = C.vision.HeadPoseResult()
+    hp_result.euler_angles = result["euler_angles"]
+    return hp_result
diff --git a/libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py b/libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py
new file mode 100755
index 0000000000..15730ddeaa
--- /dev/null
+++ b/libs/ultrainfer/python/ultrainfer/vision/visualize/__init__.py
@@ -0,0 +1,229 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from ... import c_lib_wrap as C
+import cv2
+
+
+def vis_detection(
+    im_data,
+    det_result,
+    labels=[],
+    score_threshold=0.0,
+    line_size=1,
+    font_size=0.5,
+    font_color=[255, 255, 255],
+    font_thickness=1,
+):
+    """Show the visualized results for detection models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param det_result: the result produced by model
+    :param labels: (list of str) the visualized result will show the bounding box contain class label
+    :param score_threshold: (float) score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold
+    :param line_size: (float) line_size line size for bounding boxes
+    :param font_size: (float) font_size font size for text
+    :param font_color: (list of int) font_color  for text
+    :param font_thickness: (int) font_thickness for text
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_detection(
+        im_data,
+        det_result,
+        labels,
+        score_threshold,
+        line_size,
+        font_size,
+        font_color,
+        font_thickness,
+    )
+
+
+def vis_perception(
+    im_data, det_result, config_file, score_threshold=0.0, line_size=1, font_size=0.5
+):
+    """Show the visualized results for 3d detection models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param det_result: the result produced by model
+    :param config_file: the config file for detection and visualization
+    :param score_threshold: (float) score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold
+    :param line_size: (float) line_size line size for bounding boxes
+    :param font_size: (float) font_size font size for text
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_perception(
+        im_data, det_result, config_file, score_threshold, line_size, font_size
+    )
+
+
+def vis_keypoint_detection(im_data, keypoint_det_result, conf_threshold=0.5):
+    """Show the visualized results for keypoint detection models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param keypoint_det_result: the result produced by model
+    :param conf_threshold: (float) conf_threshold threshold for result scores, the bounding box will not be shown if the score is less than conf_threshold
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.Visualize.vis_keypoint_detection(
+        im_data, keypoint_det_result, conf_threshold
+    )
+
+
+def vis_face_detection(im_data, face_det_result, line_size=1, font_size=0.5):
+    """Show the visualized results for face detection models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param face_det_result: the result produced by model
+    :param line_size: (float) line_size line size for bounding boxes
+    :param font_size: (float) font_size font size for text
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_face_detection(im_data, face_det_result, line_size, font_size)
+
+
+def vis_face_alignment(im_data, face_align_result, line_size=1):
+    """Show the visualized results for face alignment models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param face_align_result: the result produced by model
+    :param line_size: (float)line_size line size for circle point
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_face_alignment(im_data, face_align_result, line_size)
+
+
+def vis_segmentation(im_data, seg_result, weight=0.5):
+    """Show the visualized results for segmentation models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param seg_result: the result produced by model
+    :param weight: (float)transparent weight of visualized result image
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_segmentation(im_data, seg_result, weight)
+
+
+def vis_matting_alpha(im_data, matting_result, remove_small_connected_area=False):
+    logging.warning(
+        "DEPRECATED: ultrainfer.vision.vis_matting_alpha is deprecated, please use ultrainfer.vision.vis_matting function instead."
+    )
+    return C.vision.vis_matting(im_data, matting_result, remove_small_connected_area)
+
+
+def vis_matting(
+    im_data,
+    matting_result,
+    transparent_background=False,
+    transparent_threshold=0.99,
+    remove_small_connected_area=False,
+):
+    """Show the visualized results for matting models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param matting_result: the result produced by model
+    :param transparent_background: whether visulizing matting result with transparent background
+    :param transparent_threshold: since the alpha value in MattringResult is a float between [0, 1], transparent_threshold is used to filter background pixel
+    :param remove_small_connected_area: (bool) if remove_small_connected_area==True, the visualized result will not include the small connected areas
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_matting(
+        im_data,
+        matting_result,
+        transparent_background,
+        transparent_threshold,
+        remove_small_connected_area,
+    )
+
+
+def swap_background_matting(
+    im_data, background, result, remove_small_connected_area=False
+):
+    logging.warning(
+        "DEPRECATED: ultrainfer.vision.swap_background_matting is deprecated, please use ultrainfer.vision.swap_background function instead."
+    )
+    assert isinstance(
+        result, C.vision.MattingResult
+    ), "The result must be MattingResult type"
+    return C.vision.Visualize.swap_background_matting(
+        im_data, background, result, remove_small_connected_area
+    )
+
+
+def swap_background_segmentation(im_data, background, background_label, result):
+    logging.warning(
+        "DEPRECATED: ultrainfer.vision.swap_background_segmentation is deprecated, please use ultrainfer.vision.swap_background function instead."
+    )
+    assert isinstance(
+        result, C.vision.SegmentationResult
+    ), "The result must be SegmentaitonResult type"
+    return C.vision.Visualize.swap_background_segmentation(
+        im_data, background, background_label, result
+    )
+
+
+def swap_background(
+    im_data, background, result, remove_small_connected_area=False, background_label=0
+):
+    """Swap the image background with MattingResult or SegmentationResult
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param background: (numpy.ndarray)The background image data, 3-D array with layout HWC, BGR format
+    :param result: The result produced by model, MattingResult or SegmentationResult
+    :param remove_small_connected_area: (bool) If remove_small_connected_area==True, the visualized result will not include the small connected areas
+    :param background_label: (int)The background label number in SegmentationResult
+    :return: (numpy.ndarray) image with visualized results
+    """
+    if isinstance(result, C.vision.MattingResult):
+        return C.vision.swap_background(
+            im_data, background, result, remove_small_connected_area
+        )
+    elif isinstance(result, C.vision.SegmentationResult):
+        return C.vision.swap_background(im_data, background, result, background_label)
+    else:
+        raise Exception(
+            "Only support result type of MattingResult or SegmentationResult, but now the data type is {}.".format(
+                type(result)
+            )
+        )
+
+
+def vis_ppocr(im_data, det_result):
+    """Show the visualized results for ocr models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param det_result: the result produced by model
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_ppocr(im_data, det_result)
+
+
+def vis_ppocr_curve(im_data, det_result):
+    """Show the visualized results for ocr models
+
+    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+    :param det_result: the result produced by model
+    :return: (numpy.ndarray) image with visualized results
+    """
+    return C.vision.vis_ppocr_curve(im_data, det_result)
+
+
+def vis_mot(im_data, mot_result, score_threshold=0.0, records=None):
+    return C.vision.vis_mot(im_data, mot_result, score_threshold, records)
+
+
+def vis_headpose(im_data, headpose_result, size=50, line_size=1):
+    return C.vision.vis_headpose(im_data, headpose_result, size, line_size)
diff --git a/libs/ultrainfer/scripts/__init__.py b/libs/ultrainfer/scripts/__init__.py
new file mode 100755
index 0000000000..59372f9379
--- /dev/null
+++ b/libs/ultrainfer/scripts/__init__.py
@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/libs/ultrainfer/scripts/ascend_init.sh b/libs/ultrainfer/scripts/ascend_init.sh
new file mode 100755
index 0000000000..e88150d3de
--- /dev/null
+++ b/libs/ultrainfer/scripts/ascend_init.sh
@@ -0,0 +1,13 @@
+# Set huawei ascend toolkit correctly.
+HUAWEI_ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
+HUAWEI_ASCEND_DRIVER_PATH="/usr/local/Ascend/driver"
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HUAWEI_ASCEND_DRIVER_PATH/lib64/driver:$HUAWEI_ASCEND_DRIVER_PATH/lib64:$HUAWEI_ASCEND_DRIVER_PATH/lib64/stub:$HUAWEI_ASCEND_TOOLKIT_HOME/acllib/lib64:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/lib64:$HUAWEI_ASCEND_TOOLKIT_HOME/opp/op_proto/built-in
+export PYTHONPATH=$PYTHONPATH:$HUAWEI_ASCEND_TOOLKIT_HOME/fwkacllib/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/acllib/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/toolkit/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/pyACL/python/site-packages/acl
+export PATH=$PATH:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/ccec_compiler/bin:${HUAWEI_ASCEND_TOOLKIT_HOME}/acllib/bin:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/bin
+export ASCEND_AICPU_PATH=$HUAWEI_ASCEND_TOOLKIT_HOME
+export ASCEND_OPP_PATH=$HUAWEI_ASCEND_TOOLKIT_HOME/opp
+export TOOLCHAIN_HOME=$HUAWEI_ASCEND_TOOLKIT_HOME/toolkit
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL=3
+
+echo "===== Finish Initializing Environment for Ascend Deployment ====="
diff --git a/libs/ultrainfer/scripts/build_bcloud_lib.py b/libs/ultrainfer/scripts/build_bcloud_lib.py
new file mode 100755
index 0000000000..e896a3c563
--- /dev/null
+++ b/libs/ultrainfer/scripts/build_bcloud_lib.py
@@ -0,0 +1,41 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import shutil
+
+dirname = sys.argv[1]
+bc_dirname = sys.argv[2]
+
+if os.path.exists(bc_dirname):
+    raise Exception("Path {} is already exists.".format(bc_dirname))
+
+os.makedirs(bc_dirname)
+
+# copy include files
+shutil.copytree(os.path.join(dirname, "include"), os.path.join(bc_dirname, "include"))
+
+# copy libraries
+shutil.copytree(os.path.join(dirname, "lib"), os.path.join(bc_dirname, "lib"))
+
+third_libs = os.path.join(dirname, "third_libs")
+
+for root, dirs, files in os.walk(third_libs):
+    for f in files:
+        if f.strip().count(".so") > 0 or f.strip() == "plugins.xml":
+            full_path = os.path.join(root, f)
+            shutil.copy(
+                full_path, os.path.join(bc_dirname, "lib"), follow_symlinks=False
+            )
diff --git a/libs/ultrainfer/scripts/clean_sdk.sh b/libs/ultrainfer/scripts/clean_sdk.sh
new file mode 100755
index 0000000000..d01730df0d
--- /dev/null
+++ b/libs/ultrainfer/scripts/clean_sdk.sh
@@ -0,0 +1,7 @@
+origin_install_dir=$1
+
+rm -rf $origin_install_dir/include/onnx $origin_install_dir/include/paddle2onnx
+mv $origin_install_dir/lib $origin_install_dir/lib_bak
+mkdir $origin_install_dir/lib
+cp $origin_install_dir/lib_bak/*ultrainfer* $origin_install_dir/lib
+rm -rf $origin_install_dir/lib_bak
diff --git a/libs/ultrainfer/scripts/copy_directory.py b/libs/ultrainfer/scripts/copy_directory.py
new file mode 100755
index 0000000000..f4363ba4a8
--- /dev/null
+++ b/libs/ultrainfer/scripts/copy_directory.py
@@ -0,0 +1,32 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import shutil
+import os
+import sys
+
+
+def copy_directory(src, dst):
+    if os.path.exists(dst):
+        raise Exception("Destination {} is already exist.".format(dst))
+    if not os.path.exists(src):
+        raise Exception("Source {} is not exist.".format(src))
+    try:
+        shutil.copytree(src, dst, symlinks=True)
+    except:
+        raise Exception("Copy {} to {} failed.".format(src, dst))
+
+
+if __name__ == "__main__":
+    copy_directory(sys.argv[1], sys.argv[2])
diff --git a/libs/ultrainfer/scripts/linux/_build_cpp.sh b/libs/ultrainfer/scripts/linux/_build_cpp.sh
new file mode 100755
index 0000000000..8c5a819cdc
--- /dev/null
+++ b/libs/ultrainfer/scripts/linux/_build_cpp.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+set -e
+
+TRT_VERSION='8.5.2.2'
+CUDA_VERSION='11.8'
+CUDNN_VERSION='8.6'
+
+# deal cmd input
+while [[ "$#" -gt 0 ]]; do
+    case "$1" in
+        --with-gpu) WITH_GPU="$2"; shift ;;
+        --enable-benchmark) ENABLE_BENCHMARK="$2"; shift ;;
+        --paddleinference-url) PADDLEINFERENCE_URL="$2"; shift ;;
+        --paddleinference-version) PADDLEINFERENCE_VERSION="$2"; shift ;;
+        *) echo "Unknown parameter passed: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+export DEBIAN_FRONTEND='noninteractive'
+export TZ='Asia/Shanghai'
+export CC=/usr/local/gcc-8.2/bin/gcc
+export CXX=/usr/local/gcc-8.2/bin/g++
+
+cd /workspace/ultrainfer
+
+wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.cloud.tencent.com/repo/centos7_base.repo
+yum clean all
+yum makecache
+
+yum install -y patchelf rapidjson-devel
+
+ln -sf /opt/_internal/cpython-3.10.0/bin/python3.10 /usr/bin/python
+ln -sf /opt/_internal/cpython-3.10.0/bin/pip3.10 /usr/bin/pip
+
+export LD_LIBRARY_PATH=/opt/_internal/cpython-3.10.0/lib:${LD_LIBRARY_PATH}
+export PATH=/opt/_internal/cpython-3.10.0/bin:${PATH}
+
+rm -rf "TensorRT-${TRT_VERSION}" "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz"
+http_proxy= https_proxy= wget "https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz"
+tar -xzvf "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz"
+
+(
+    cd /workspace/ultrainfer
+    rm -rf build && mkdir build && cd build
+    unset http_proxy https_proxy
+    cmake \
+        -DLIBRARY_NAME='ultrainfer_runtime' \
+        -DCMAKE_INSTALL_PREFIX="${PWD}/ultrainfer_install" \
+        -DWITH_GPU="${WITH_GPU}" \
+        -DENABLE_TRT_BACKEND="${WITH_GPU}" \
+        -DTRT_DIRECTORY="/workspace/ultrainfer/TensorRT-${TRT_VERSION}" \
+        -DENABLE_ORT_BACKEND=ON \
+        -DENABLE_PADDLE_BACKEND=ON \
+        -DPADDLEINFERENCE_URL="${PADDLEINFERENCE_URL}" \
+        -DPADDLEINFERENCE_VERSION="${PADDLEINFERENCE_VERSION}" \
+        -DENABLE_OPENVINO_BACKEND=ON \
+        -DENABLE_VISION=ON \
+        -DENABLE_TEXT=ON \
+        -DBUILD_ULTRAINFER_PYTHON=OFF \
+        -DBUILD_FD_TRITON_BACKEND=ON \
+        -DENABLE_BENCHMARK="${ENABLE_BENCHMARK}" \
+        ..
+    make -j"$(nproc)"
+    make install
+)
diff --git a/libs/ultrainfer/scripts/linux/_build_py.sh b/libs/ultrainfer/scripts/linux/_build_py.sh
new file mode 100755
index 0000000000..de06b7949c
--- /dev/null
+++ b/libs/ultrainfer/scripts/linux/_build_py.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+set -e
+
+TRT_VERSION='8.5.2.2'
+CUDA_VERSION='11.8'
+CUDNN_VERSION='8.6'
+
+while [[ "$#" -gt 0 ]]; do
+    case "$1" in
+        --with-gpu) WITH_GPU="$2"; shift ;;
+        --enable-benchmark) ENABLE_BENCHMARK="$2"; shift ;;
+        --python) PYTHON_VERSION="$2"; shift ;;
+        --paddleinference-url) PADDLEINFERENCE_URL="$2"; shift ;;
+        --paddleinference-version) PADDLEINFERENCE_VERSION="$2"; shift ;;
+        *) echo "Unknown parameter passed: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+export DEBIAN_FRONTEND='noninteractive'
+export TZ='Asia/Shanghai'
+
+cd /workspace
+
+wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.cloud.tencent.com/repo/centos7_base.repo
+yum clean all
+yum makecache
+
+yum --disablerepo=epel -y update ca-certificates
+yum install -y wget bzip2
+yum install -y epel-release
+yum install -y patchelf rapidjson-devel
+
+PYTHON_DIR="/opt/_internal/cpython-${PYTHON_VERSION}"
+if [ -d "$PYTHON_DIR" ]; then
+    ln -sf "${PYTHON_DIR}/bin/python${PYTHON_VERSION}" /usr/bin/python
+    ln -sf "${PYTHON_DIR}/bin/pip${PYTHON_VERSION}" /usr/bin/pip
+    export LD_LIBRARY_PATH="${PYTHON_DIR}/lib:${LD_LIBRARY_PATH}"
+    export PATH="${PYTHON_DIR}/bin:${PATH}"
+else
+    echo "Python version ${PYTHON_VERSION} not found in ${PYTHON_DIR}."
+    exit 1
+fi
+
+python -m pip install numpy pandas
+
+cd /workspace/ultrainfer
+
+rm -rf "TensorRT-${TRT_VERSION}" "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz"
+http_proxy= https_proxy= wget "https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz"
+tar -xzvf "TensorRT-${TRT_VERSION}.Linux.x86_64-gnu.cuda-${CUDA_VERSION}.cudnn${CUDNN_VERSION}.tar.gz"
+
+export WITH_GPU="${WITH_GPU}"
+export ENABLE_TRT_BACKEND="${WITH_GPU}"
+export TRT_DIRECTORY="/workspace/ultrainfer/TensorRT-${TRT_VERSION}"
+export ENABLE_ORT_BACKEND=ON
+export ENABLE_PADDLE_BACKEND=ON
+export PADDLEINFERENCE_URL="${PADDLEINFERENCE_URL}"
+export PADDLEINFERENCE_VERSION="${PADDLEINFERENCE_VERSION}"
+export ENABLE_OPENVINO_BACKEND=ON
+export ENABLE_VISION=ON
+export ENABLE_TEXT=ON
+export ENABLE_BENCHMARK="${ENABLE_BENCHMARK}"
+export CC=/usr/local/gcc-8.2/bin/gcc
+export CXX=/usr/local/gcc-8.2/bin/g++
+
+cd /workspace/ultrainfer/python
+python -m pip install wheel
+unset http_proxy https_proxy
+
+rm -rf .setuptools-cmake-build build ultrainfer/libs/third_libs dist
+python setup.py build
+# HACK
+patchelf \
+    --set-rpath '$ORIGIN/libs/third_libs/onnxruntime/lib:$ORIGIN/libs/third_libs/paddle2onnx/lib:$ORIGIN/libs/third_libs/paddle_inference/paddle/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/cryptopp/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/mklml/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/glog/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/protobuf/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/utf8proc/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/xxhash/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/gflags/lib:$ORIGIN/libs/third_libs/paddle_inference/third_party/install/onednn/lib:$ORIGIN/libs/third_libs/tensorrt/lib:$ORIGIN/libs/third_libs/opencv/lib64:$ORIGIN/libs/third_libs/openvino/runtime/lib:$ORIGIN/libs/third_libs/openvino/runtime/3rdparty/omp/lib' \
+    build/lib.*/ultrainfer/ultrainfer_main*.so
+python setup.py bdist_wheel
diff --git a/libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh
new file mode 100755
index 0000000000..e091e8aab4
--- /dev/null
+++ b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_cpp.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# input
+CONTAINER_NAME="${CONTAINER_NAME:-build_fd}"
+WITH_GPU="${WITH_GPU:-ON}"
+ENABLE_BENCHMARK="${ENABLE_BENCHMARK:-OFF}"
+DEBUG="${DEBUG:-OFF}"
+
+DOCKER_IMAGE="ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle_manylinux_devel:cuda11.8-cudnn8.6-trt8.5-gcc8.2"
+
+if [[ -z "$PADDLEINFERENCE_URL" ]]; then
+    echo "Error: PADDLEINFERENCE_URL is not set."
+    exit 1
+fi
+
+if [[ -z "$PADDLEINFERENCE_VERSION" ]]; then
+    echo "Error: PADDLEINFERENCE_VERSION is not set."
+    exit 1
+fi
+
+# Set variables
+CMAKE_CXX_COMPILER="/usr/local/gcc-8.2/bin/g++"
+
+# Get the current script directory and compute the directory to mount
+SCRIPT_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
+ULTRAINFER_DIR="$(realpath "$SCRIPT_DIR/../../../")"
+
+# Set the Docker startup command
+if [ "$WITH_GPU" = "ON" ]; then
+    DOCKER_CMD=$(cat << EOF
+docker run --gpus all -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \
+-v "${ULTRAINFER_DIR}":/workspace \
+-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \
+-e "http_proxy=${http_proxy}" \
+-e "https_proxy=${https_proxy}" \
+"${DOCKER_IMAGE}" /bin/bash -c "
+ldconfig && \
+cd /workspace && \
+./ultrainfer/scripts/linux/_build_cpp.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \
+tail -f /dev/null"
+EOF
+)
+else
+    DOCKER_CMD=$(cat << EOF
+docker run -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \
+-v "${ULTRAINFER_DIR}":/workspace \
+-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \
+-e "http_proxy=${http_proxy}" \
+-e "https_proxy=${https_proxy}" \
+"${DOCKER_IMAGE}" /bin/bash -c "
+cd /workspace && \
+./ultrainfer/scripts/linux/_build_cpp.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \
+tail -f /dev/null"
+EOF
+)
+fi
+
+# If in debug mode, replace --rm with -it and keep the container running
+if [ "$DEBUG" = "OFF" ]; then
+    DOCKER_CMD="${DOCKER_CMD/-it/--rm}"
+    DOCKER_CMD="${DOCKER_CMD/ && tail -f \/dev\/null/}"
+fi
+
+# Check if a Docker container with the same name already exists
+if docker ps -a --format '{{.Names}}' | grep -Eq "^${CONTAINER_NAME}\$"; then
+    echo "Error: A Docker container with the name '${CONTAINER_NAME}' already exists."
+    echo "Please remove the existing container or choose a different container name."
+    exit 1
+fi
+
+echo "Starting Docker container..."
+eval "$DOCKER_CMD"
diff --git a/libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh
new file mode 100755
index 0000000000..bec7f6028e
--- /dev/null
+++ b/libs/ultrainfer/scripts/linux/set_up_docker_and_build_py.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# input
+CONTAINER_NAME="${CONTAINER_NAME:-build_fd}"
+WITH_GPU="${WITH_GPU:-ON}"
+ENABLE_BENCHMARK="${ENABLE_BENCHMARK:-OFF}"
+DEBUG="${DEBUG:-OFF}"
+PYTHON_VERSION="${PYTHON_VERSION:-3.10.0}"
+
+DOCKER_IMAGE="ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle_manylinux_devel:cuda11.8-cudnn8.6-trt8.5-gcc8.2"
+
+if [[ -z "$PADDLEINFERENCE_URL" ]]; then
+    echo "Error: PADDLEINFERENCE_URL is not set."
+    exit 1
+fi
+
+if [[ -z "$PADDLEINFERENCE_VERSION" ]]; then
+    echo "Error: PADDLEINFERENCE_VERSION is not set."
+    exit 1
+fi
+
+# Set variables
+CMAKE_CXX_COMPILER="/usr/local/gcc-8.2/bin/g++"
+
+# Get the current script directory and compute the directory to mount
+SCRIPT_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
+ULTRAINFER_DIR="$(realpath "$SCRIPT_DIR/../../../")"
+
+# Set the Docker startup command
+if [ "$WITH_GPU" = "ON" ]; then
+    DOCKER_CMD=$(cat << EOF
+docker run --gpus all -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \
+-v "${ULTRAINFER_DIR}":/workspace \
+-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \
+-e "http_proxy=${http_proxy}" \
+-e "https_proxy=${https_proxy}" \
+"${DOCKER_IMAGE}" /bin/bash -c "
+cd /workspace && \
+ldconfig && \
+./ultrainfer/scripts/linux/_build_py.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --python "${PYTHON_VERSION}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \
+tail -f /dev/null"
+EOF
+)
+else
+    DOCKER_CMD=$(cat << EOF
+docker run -it --name="${CONTAINER_NAME}" --shm-size=128g --net=host \
+-v "${ULTRAINFER_DIR}":/workspace \
+-e CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \
+-e "http_proxy=${http_proxy}" \
+-e "https_proxy=${https_proxy}" \
+"${DOCKER_IMAGE}" /bin/bash -c "
+cd /workspace && \
+./ultrainfer/scripts/linux/_build_py.sh --with-gpu "${WITH_GPU}" --enable-benchmark "${ENABLE_BENCHMARK}" --python "${PYTHON_VERSION}" --paddleinference-url "${PADDLEINFERENCE_URL}" --paddleinference-version "${PADDLEINFERENCE_VERSION}" && \
+tail -f /dev/null"
+EOF
+)
+fi
+
+# If in debug mode, replace --rm with -it and keep the container running
+if [ "$DEBUG" = "OFF" ]; then
+    DOCKER_CMD="${DOCKER_CMD/-it/--rm}"
+    DOCKER_CMD="${DOCKER_CMD/ && tail -f \/dev\/null/}"
+fi
+
+# Check if a Docker container with the same name already exists
+if docker ps -a --format '{{.Names}}' | grep -Eq "^${CONTAINER_NAME}\$"; then
+    echo "Error: A Docker container with the name '${CONTAINER_NAME}' already exists."
+    echo "Please remove the existing container or choose a different container name."
+    exit 1
+fi
+
+echo "Starting Docker container..."
+eval "$DOCKER_CMD"
diff --git a/libs/ultrainfer/scripts/patch_lib.sh b/libs/ultrainfer/scripts/patch_lib.sh
new file mode 100755
index 0000000000..952c838d8e
--- /dev/null
+++ b/libs/ultrainfer/scripts/patch_lib.sh
@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+patchelf --set-rpath '${ORIGIN}' $1/*.so*
diff --git a/libs/ultrainfer/scripts/patch_paddle_inference.py b/libs/ultrainfer/scripts/patch_paddle_inference.py
new file mode 100755
index 0000000000..1fae19e707
--- /dev/null
+++ b/libs/ultrainfer/scripts/patch_paddle_inference.py
@@ -0,0 +1,52 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import shutil
+import subprocess
+import platform
+import sys
+
+
+def process_paddle_inference(paddle_inference_so_file, paddle_inference_version):
+    if platform.system().lower() != "linux":
+        return
+    rpaths = [
+        "$ORIGIN",
+        "$ORIGIN/../../third_party/install/mklml/lib/",
+        "$ORIGIN/../../third_party/install/xpu/lib/",
+        "$ORIGIN/../../third_party/install/fdmodel/lib/",
+        "$ORIGIN/../../../tensorrt/lib/",
+    ]
+    version_major = int(paddle_inference_version.split(".")[0])
+    if paddle_inference_version != "0.0.0" and version_major < 2:
+        raise ValueError("Invalid Paddle Inference version")
+    if version_major == 2:
+        rpaths.append("$ORIGIN/../../third_party/install/mkldnn/lib/")
+    else:
+        rpaths.append("$ORIGIN/../../third_party/install/onednn/lib/")
+
+    patchelf_exe = os.getenv("PATCHELF_EXE", "patchelf")
+    command = "{} --force-rpath --set-rpath '{}' {}".format(
+        patchelf_exe, ":".join(rpaths), paddle_inference_so_file
+    )
+    if platform.machine() != "sw_64" and platform.machine() != "mips64":
+        assert os.system(command) == 0, "patchelf {} failed, the command: {}".format(
+            paddle_inference_so_file, command
+        )
+
+
+if __name__ == "__main__":
+    process_paddle_inference(sys.argv[1], sys.argv[2])
diff --git a/libs/ultrainfer/scripts/patch_paddle_lite.py b/libs/ultrainfer/scripts/patch_paddle_lite.py
new file mode 100755
index 0000000000..39ad9abcf0
--- /dev/null
+++ b/libs/ultrainfer/scripts/patch_paddle_lite.py
@@ -0,0 +1,44 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import shutil
+import subprocess
+import platform
+import sys
+
+
+def process_paddle_lite(paddle_lite_so_path):
+    if platform.system().lower() != "linux":
+        return
+    rpaths = ["$ORIGIN", "$ORIGIN/mklml/lib/"]
+    patchelf_exe = os.getenv("PATCHELF_EXE", "patchelf")
+    for root, dirs, files in os.walk(paddle_lite_so_path):
+        for lib in files:
+            if ".so" in lib:
+                paddle_lite_so_file = os.path.join(root, lib)
+                command = "{} --set-rpath '{}' {}".format(
+                    patchelf_exe, ":".join(rpaths), paddle_lite_so_file
+                )
+                if platform.machine() != "sw_64" and platform.machine() != "mips64":
+                    assert (
+                        os.system(command) == 0
+                    ), "patchelf {} failed, the command: {}".format(
+                        paddle_lite_so_file, command
+                    )
+
+
+if __name__ == "__main__":
+    process_paddle_lite(sys.argv[1])
diff --git a/libs/ultrainfer/scripts/ultrainfer_init.bat b/libs/ultrainfer/scripts/ultrainfer_init.bat
new file mode 100755
index 0000000000..845070b201
--- /dev/null
+++ b/libs/ultrainfer/scripts/ultrainfer_init.bat
@@ -0,0 +1,167 @@
+@echo off
+
+set __script_action_type=%1
+set __ultrainfer_sdk_dir=%2
+set __another_target_dir=%3
+set __install_infos_flag=%4
+
+@rem help
+if "%__script_action_type%" == "help" (
+    call:__print_long_line
+    echo [1] [help]    print help information:                      ultrainfer_init.bat help
+    echo [2] [show]    show all dlls/libs/include paths:            ultrainfer_init.bat show ultrainfer-sdk-dir
+    echo [3] [init]    init all dlls paths for current terminal:    ultrainfer_init.bat init ultrainfer-sdk-dir  [WARNING: need copy onnxruntime.dll manually]
+    echo [4] [setup]   setup path env for current terminal:         ultrainfer_init.bat setup ultrainfer-sdk-dir [WARNING: need copy onnxruntime.dll manually]
+    echo [5] [install] install all dlls to a specific dir:          ultrainfer_init.bat install ultrainfer-sdk-dir another-dir-to-install-dlls **[RECOMMEND]**
+    echo [6] [install] install all dlls with logging infos:         ultrainfer_init.bat install ultrainfer-sdk-dir another-dir-to-install-dlls info
+    call:__print_long_line
+    goto:eof
+)
+
+@rem show dlls and libs
+if "%__script_action_type%" == "show" (
+
+    call:__print_long_line
+    echo [SDK] %__ultrainfer_sdk_dir%
+
+    call:__print_long_line
+    set __have_openvino_flag=false
+    set __ultrainfer_lib_dir=%__ultrainfer_sdk_dir%\lib
+
+    @setlocal enabledelayedexpansion
+    echo [DLL] !__ultrainfer_lib_dir!\ultrainfer.dll **[NEEDED]**
+    for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e \.dll ^| findstr /v "vc14\\bin\\opencv"') do ( 
+        set __3rd_dll_file=%%a && set __3rd_needed_flag=true
+        echo !__3rd_dll_file! | findstr "openvino">nul && set __have_openvino_flag=true
+        echo !__3rd_dll_file! | findstr d\.dll>nul && set __3rd_needed_flag=false
+        if "!__3rd_needed_flag!"=="false" (echo !__3rd_dll_file! | findstr /v opencv>nul && set __3rd_needed_flag=true)
+        echo !__3rd_dll_file! | findstr debug\.dll>nul && set __3rd_needed_flag=false
+        if "!__3rd_needed_flag!"=="true" (echo [DLL] !__3rd_dll_file! **[NEEDED]**) else (echo [DLL] !__3rd_dll_file!)
+    )
+
+    call:__print_long_line
+    echo [Lib] !__ultrainfer_lib_dir!\ultrainfer.lib **[NEEDED][ultrainfer]**
+    for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e \.lib ^| findstr /v "vc14\\lib\\opencv"') do ( 
+        set __3rd_lib_file=%%a && set __3rd_needed_flag=false && set __api_tag=ultrainfer
+        echo !__3rd_lib_file! | findstr "opencv">nul && set __3rd_needed_flag=true
+        echo !__3rd_lib_file! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision
+        if "!__3rd_needed_flag!"=="true" (echo !__3rd_lib_file! | findstr d\.lib>nul && set __3rd_needed_flag=false)
+        echo !__3rd_lib_file! | findstr "fast_tokenizer">nul && set __3rd_needed_flag=true
+        echo !__3rd_lib_file! | findstr "fast_tokenizer">nul && set __api_tag=!__api_tag!::text
+        if "!__3rd_needed_flag!"=="true" (echo [Lib] !__3rd_lib_file! **[NEEDED][!__api_tag!]**) else (echo [Lib] !__3rd_lib_file!)
+    )
+
+    call:__print_long_line
+    set __ultrainfer_include_dir=%__ultrainfer_sdk_dir%\include
+    echo [Include] !__ultrainfer_include_dir! **[NEEDED][ultrainfer]**
+    for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e include ^| findstr /v "vc14\\bin\\opencv"') do ( 
+        set __3rd_include_dir=%%a && set __3rd_needed_flag=false && set __api_tag=ultrainfer
+        echo !__3rd_include_dir! | findstr "opencv">nul && set __3rd_needed_flag=true
+        echo !__3rd_include_dir! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision
+        echo !__3rd_include_dir! | findstr "fast_tokenizer">nul && set __3rd_needed_flag=true
+        echo !__3rd_include_dir! | findstr "fast_tokenizer">nul && set __api_tag=!__api_tag!::text
+        if "!__3rd_needed_flag!"=="true" (echo [Include] !__3rd_include_dir! **[NEEDED][!__api_tag!]**) else (echo [Include] !__3rd_include_dir!)
+    )
+
+    call:__print_long_line  
+    if "!__have_openvino_flag!"=="true" (
+        for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir%\third_libs ^| findstr /e \.xml ^| findstr "openvino"') do ( 
+            set __openvino_plugin_xml=%%a
+            echo [XML] !__openvino_plugin_xml! **[NEEDED]**
+        )
+        call:__print_long_line
+    )
+    @setlocal disabledelayedexpansion
+    goto:eof
+)
+
+@rem init all paths for dlls
+if "%__script_action_type%" == "init" (
+    @setlocal enabledelayedexpansion
+    set /p yes_or_no=Init dll paths for UltraInfer in current terminal: [y/n]
+    if "!yes_or_no!"=="y" (echo YES.) else (echo NO. && pause && goto:eof)
+    @setlocal disabledelayedexpansion
+    if exist bin.txt (del /Q bin.txt)
+    if exist lib.txt (del /Q lib.txt)
+    for /f "delims= " %%a in ('dir /s /b /A:D %__ultrainfer_sdk_dir% ^| findstr /v include ^| findstr /e bin ^| findstr /v "vc14\\bin"') do (>>bin.txt set /p=%%a;<nul)
+    for /f "delims= " %%a in ('dir /s /b /A:D %__ultrainfer_sdk_dir% ^| findstr /v include ^| findstr /e lib ^| findstr /v "vc14\\lib"') do (>>lib.txt set /p=%%a;<nul)
+    for /f %%i in (bin.txt) do (endlocal & set __ultrainfer_bin_paths=%%i)
+    for /f %%j in (lib.txt) do (endlocal & set __ultrainfer_lib_paths=%%j)
+    pause
+    call:__print_long_line
+    echo [INFO] UltraInfer dlls paths init done! Please run: [ultrainfer_init.bat setup ultrainfer-sdk-dir]
+    echo [INFO] command to push these dlls paths into PATH ENV in current terminal.
+    call:__print_long_line
+    goto:eof
+)
+
+@rem setup PATH ENV for all dlls
+if "%__script_action_type%" == "setup" (
+    @setlocal enabledelayedexpansion
+    set /p yes_or_no=Setup PATH ENV for UltraInfer in current terminal: [y/n]
+    if "!yes_or_no!"=="y" (echo YES.) else (echo NO. && pause && goto:eof)
+    @setlocal disabledelayedexpansion
+    if not exist bin.txt (echo Can not found bin.txt, Please run init before setup && goto:eof)
+    if not exist lib.txt (echo Can not found lib.txt, Please run init before setup && goto:eof)
+    for /f %%i in (bin.txt) do (endlocal & set __ultrainfer_bin_paths=%%i)
+    for /f %%j in (lib.txt) do (endlocal & set __ultrainfer_lib_paths=%%j)
+    set "PATH=%__ultrainfer_bin_paths%%__ultrainfer_lib_paths%%PATH%"
+    pause
+    call:__print_long_line
+    echo [INFO] UltraInfer PATH ENV setup done! Please use [set PATH] to check PATH ENV in current terminal.
+    echo [INFO] Just setup once again if the paths of UltraInfer can not be found in your PATH ENV.
+    call:__print_long_line
+    echo [WARN] Please copy all onnxruntime dlls manually to your-exe-or-custom-dll-dir if ENABLE_ORT_BACKEND=ON.
+    echo [WARN] Use [ultrainfer_init.bat show ultrainfer-sdk-dir] to find the dll's location of onnxruntime.
+    call:__print_long_line
+    goto:eof
+)
+
+@rem copy all dlls to a specific location  
+if "%__script_action_type%" == "install" (
+    @setlocal enabledelayedexpansion
+    if "!__install_infos_flag!"=="noconfirm" (
+        echo [INFO] Installing all UltraInfer dlls ...
+    ) else (
+        echo [INFO] Do you want to install all UltraInfer dlls ?
+        echo [INFO] From: !__ultrainfer_sdk_dir!
+        echo [INFO]   To: !__another_target_dir!
+        set /p yes_or_no=Choose y means YES, n means NO: [y/n]
+        if "!yes_or_no!"=="y" (echo YES.) else (echo NO. && pause && goto:eof)
+        pause
+    )
+    @setlocal disabledelayedexpansion
+    if not exist %__ultrainfer_sdk_dir% ( echo [ERROR] %__ultrainfer_sdk_dir% is not exist ! && goto:eof )
+    if not exist %__another_target_dir% ( mkdir %__another_target_dir% && echo [INFO] Created %__another_target_dir% done!)
+    set __have_openvino_flag=false
+    @setlocal enabledelayedexpansion
+    for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir% ^| findstr /e \.dll ^| findstr /v "vc14\\bin\\opencv"') do ( 
+        set __3rd_or_fd_dll_file=%%a && set __3rd_or_fd_needed_flag=true
+        echo !__3rd_or_fd_dll_file! | findstr "openvino">nul && set __have_openvino_flag=true
+        echo !__3rd_or_fd_dll_file! | findstr d\.dll>nul && set __3rd_or_fd_needed_flag=false
+        if "!__3rd_or_fd_needed_flag!"=="false" ( echo !__3rd_or_fd_dll_file! | findstr /v opencv>nul && set __3rd_or_fd_needed_flag=true)
+        echo !__3rd_or_fd_dll_file! | findstr debug\.dll>nul && set __3rd_or_fd_needed_flag=false
+        if "!__3rd_or_fd_needed_flag!"=="true" (
+            copy /Y !__3rd_or_fd_dll_file! %__another_target_dir%
+            if "!__install_infos_flag!"=="info" ( echo [Installed][DLL] !__3rd_or_fd_dll_file! "--->" %__another_target_dir%)
+        )
+    )
+    if "!__have_openvino_flag!"=="true" (
+        for /f "delims= " %%a in ('dir /s /b %__ultrainfer_sdk_dir% ^| findstr /e \.xml ^| findstr "openvino"') do ( 
+            set __openvino_plugin_xml=%%a
+            copy /Y !__openvino_plugin_xml! %__another_target_dir%
+            if "!__install_infos_flag!"=="info" ( echo [Installed][XML] !__openvino_plugin_xml! "--->" %__another_target_dir% )
+        )
+    )
+    @setlocal disabledelayedexpansion
+    goto:eof
+)
+goto:eof
+
+@rem helpers
+:__print_long_line
+echo ------------------------------------------------------------------------------------------------------------------------------------------------------------
+goto:eof
+@rem end
+
+@echo on
diff --git a/libs/ultrainfer/scripts/ultrainfer_init.sh b/libs/ultrainfer/scripts/ultrainfer_init.sh
new file mode 100755
index 0000000000..f004298c72
--- /dev/null
+++ b/libs/ultrainfer/scripts/ultrainfer_init.sh
@@ -0,0 +1,61 @@
+# source this file to import libraries
+
+PLATFORM=`uname`
+ULTRAINFER_LIBRARY_PATH=${BASH_SOURCE:-$0}
+if [[ "$PLATFORM" = "Linux" ]];then
+    ULTRAINFER_LIBRARY_PATH=`readlink -f ${ULTRAINFER_LIBRARY_PATH}`
+fi
+ULTRAINFER_LIBRARY_PATH=$(cd `dirname ${ULTRAINFER_LIBRARY_PATH}`; pwd)
+
+echo "=============== Information ======================"
+echo "UltraInfer Library Path: $ULTRAINFER_LIBRARY_PATH"
+echo "Platform: $PLATFORM"
+echo "=================================================="
+
+# Find all the .so files' path
+if [[ "$(ps -a $$)" =~ "zsh" ]]; then
+    ALL_SO_FILES=(`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"`)
+    ALL_DYLIB_FILES=(`find $ULTRAINFER_LIBRARY_PATH -name "*.dylib*"`)
+else
+    ALL_SO_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.so*"`
+    ALL_DYLIB_FILES=`find $ULTRAINFER_LIBRARY_PATH -name "*.dylib*"`
+fi
+
+for SO_FILE in $ALL_SO_FILES;do
+    LIBS_DIRECTORIES+=(${SO_FILE%/*})
+done
+
+# Find all the .dylib files' path
+# ALL_DYLIB_FILES=(`find $ULTRAINFER_LIBRARY_PATH -name "*.dylib*"`)
+for DYLIB_FILE in $ALL_DYLIB_FILES;do
+    LIBS_DIRECTORIES+=(${DYLIB_FILE%/*})
+done
+
+# Remove the dumplicate directories
+LIBS_DIRECTORIES=($(awk -v RS=' ' '!a[$1]++' <<< ${LIBS_DIRECTORIES[@]}))
+
+# Print the dynamic library location and output the configuration file
+IMPORT_PATH=""
+output_file=${ULTRAINFER_LIBRARY_PATH}/ultrainfer_libs.conf
+rm -rf $output_file
+for LIB_DIR in ${LIBS_DIRECTORIES[@]};do
+    echo "Find Library Directory: $LIB_DIR"
+    echo "$LIB_DIR" >> $output_file
+    IMPORT_PATH=${LIB_DIR}":"$IMPORT_PATH
+done
+
+if [ -f "ascend_init.sh" ]
+then
+    source ascend_init.sh
+fi
+
+echo "[Execute] Will try to export all the library directories to environments, if not work, please try to export these path by your self."
+PLATFORM=`uname`
+if [[ "$PLATFORM" = "Linux" ]];then
+  NEW_LIB_PATH=$(tr ":" "\n" <<< "${IMPORT_PATH}:$LD_LIBRARY_PATH" | sort | uniq | tr "\n" ":")
+  export LD_LIBRARY_PATH=$NEW_LIB_PATH
+fi
+if [[ "$PLATFORM" = "Darwin" ]];then
+  NEW_LIB_PATH=$(tr ":" "\n" <<< "${IMPORT_PATH}:$DYLD_LIBRARY_PATH" | sort | uniq | tr "\n" ":")
+  export DYLD_LIBRARY_PATH=$NEW_LIB_PATH
+fi
diff --git a/libs/ultrainfer/ultrainfer/CMakeLists.txt b/libs/ultrainfer/ultrainfer/CMakeLists.txt
new file mode 100755
index 0000000000..e69de29bb2
diff --git a/libs/ultrainfer/ultrainfer/benchmark/benchmark.h b/libs/ultrainfer/ultrainfer/benchmark/benchmark.h
new file mode 100755
index 0000000000..e3c556525c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/benchmark/benchmark.h
@@ -0,0 +1,86 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/benchmark/option.h"
+#include "ultrainfer/benchmark/results.h"
+#include "ultrainfer/core/config.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/utils/utils.h"
+
+#ifdef ENABLE_BENCHMARK
+#define __RUNTIME_PROFILE_LOOP_BEGIN(option, base_loop)                        \
+  int __p_loop = (base_loop);                                                  \
+  const bool __p_enable_profile = option.enable_profile;                       \
+  const bool __p_include_h2d_d2h = option.include_h2d_d2h;                     \
+  const int __p_repeats = option.repeats;                                      \
+  const int __p_warmup = option.warmup;                                        \
+  if (__p_enable_profile && (!__p_include_h2d_d2h)) {                          \
+    __p_loop = (__p_repeats) + (__p_warmup);                                   \
+    FDINFO << option << std::endl;                                             \
+  }                                                                            \
+  TimeCounter __p_tc;                                                          \
+  bool __p_tc_start = false;                                                   \
+  for (int __p_i = 0; __p_i < __p_loop; ++__p_i) {                             \
+    if (__p_i >= (__p_warmup) && (!__p_tc_start)) {                            \
+      __p_tc.Start();                                                          \
+      __p_tc_start = true;                                                     \
+    }
+
+#define __RUNTIME_PROFILE_LOOP_END(result)                                     \
+  }                                                                            \
+  if ((__p_enable_profile && (!__p_include_h2d_d2h))) {                        \
+    if (__p_tc_start) {                                                        \
+      __p_tc.End();                                                            \
+      double __p_tc_duration = __p_tc.Duration();                              \
+      result.time_of_runtime =                                                 \
+          __p_tc_duration / static_cast<double>(__p_repeats);                  \
+    }                                                                          \
+  }
+
+#define __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(option, base_loop)                \
+  int __p_loop_h = (base_loop);                                                \
+  const bool __p_enable_profile_h = option.enable_profile;                     \
+  const bool __p_include_h2d_d2h_h = option.include_h2d_d2h;                   \
+  const int __p_repeats_h = option.repeats;                                    \
+  const int __p_warmup_h = option.warmup;                                      \
+  if (__p_enable_profile_h && __p_include_h2d_d2h_h) {                         \
+    __p_loop_h = (__p_repeats_h) + (__p_warmup_h);                             \
+    FDINFO << option << std::endl;                                             \
+  }                                                                            \
+  TimeCounter __p_tc_h;                                                        \
+  bool __p_tc_start_h = false;                                                 \
+  for (int __p_i_h = 0; __p_i_h < __p_loop_h; ++__p_i_h) {                     \
+    if (__p_i_h >= (__p_warmup_h) && (!__p_tc_start_h)) {                      \
+      __p_tc_h.Start();                                                        \
+      __p_tc_start_h = true;                                                   \
+    }
+
+#define __RUNTIME_PROFILE_LOOP_H2D_D2H_END(result)                             \
+  }                                                                            \
+  if ((__p_enable_profile_h && __p_include_h2d_d2h_h)) {                       \
+    if (__p_tc_start_h) {                                                      \
+      __p_tc_h.End();                                                          \
+      double __p_tc_duration_h = __p_tc_h.Duration();                          \
+      result.time_of_runtime =                                                 \
+          __p_tc_duration_h / static_cast<double>(__p_repeats_h);              \
+    }                                                                          \
+  }
+#else
+#define __RUNTIME_PROFILE_LOOP_BEGIN(option, base_loop)                        \
+  for (int __p_i = 0; __p_i < (base_loop); ++__p_i) {
+#define __RUNTIME_PROFILE_LOOP_END(result) }
+#define __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(option, base_loop)                \
+  for (int __p_i_h = 0; __p_i_h < (base_loop); ++__p_i_h) {
+#define __RUNTIME_PROFILE_LOOP_H2D_D2H_END(result) }
+#endif
diff --git a/libs/ultrainfer/ultrainfer/benchmark/option.h b/libs/ultrainfer/ultrainfer/benchmark/option.h
new file mode 100755
index 0000000000..eb45a1fb48
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/benchmark/option.h
@@ -0,0 +1,49 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+namespace ultrainfer {
+
+/** \brief All C++ UltraInfer benchmark profile APIs are defined inside this
+ * namespace
+ *
+ */
+namespace benchmark {
+
+// @brief Option object used to control the behavior of the benchmark profiling.
+//
+struct BenchmarkOption {
+  int warmup = 50;              ///< Warmup for backend inference.
+  int repeats = 100;            ///< Repeats for backend inference.
+  bool enable_profile = false;  ///< Whether to use profile or not.
+  bool include_h2d_d2h = false; ///< Whether to include time of H2D_D2H for time
+                                ///< of runtime. // NOLINT
+
+  friend std::ostream &operator<<(std::ostream &output,
+                                  const BenchmarkOption &option) {
+    if (!option.include_h2d_d2h) {
+      output << "Running profiling for Runtime "
+             << "without H2D and D2H, ";
+    } else {
+      output << "Running profiling for Runtime "
+             << "with H2D and D2H, ";
+    }
+    output << "Repeats: " << option.repeats << ", "
+           << "Warmup: " << option.warmup;
+    return output;
+  }
+};
+
+} // namespace benchmark
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/benchmark/results.h b/libs/ultrainfer/ultrainfer/benchmark/results.h
new file mode 100755
index 0000000000..527eb0a885
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/benchmark/results.h
@@ -0,0 +1,28 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+namespace ultrainfer {
+namespace benchmark {
+
+/*! @brief Result object used to record the time of runtime after benchmark
+ * profiling is done.
+ */
+struct BenchmarkResult {
+  ///< Means pure_backend_time+time_of_h2d_d2h(if include_h2d_d2h=true).
+  double time_of_runtime = 0.0f;
+};
+
+} // namespace benchmark
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/benchmark/utils.cc b/libs/ultrainfer/ultrainfer/benchmark/utils.cc
new file mode 100755
index 0000000000..d3b0896da7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/benchmark/utils.cc
@@ -0,0 +1,908 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/types.h>
+#ifdef __linux__
+#include <sys/resource.h>
+#endif
+#include <cmath>
+
+#include "ultrainfer/benchmark/utils.h"
+#include "ultrainfer/utils/path.h"
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+#include "ultrainfer/vision/utils/utils.h"
+#endif
+
+namespace ultrainfer {
+namespace benchmark {
+
+#if defined(ENABLE_BENCHMARK)
+std::string Strip(const std::string &str, char ch) {
+  int i = 0;
+  while (str[i] == ch) {
+    i++;
+  }
+  int j = str.size() - 1;
+  while (str[j] == ch) {
+    j--;
+  }
+  return str.substr(i, j + 1 - i);
+}
+
+void Split(const std::string &s, std::vector<std::string> &tokens, char delim) {
+  tokens.clear();
+  size_t lastPos = s.find_first_not_of(delim, 0);
+  size_t pos = s.find(delim, lastPos);
+  while (lastPos != std::string::npos) {
+    tokens.emplace_back(s.substr(lastPos, pos - lastPos));
+    lastPos = s.find_first_not_of(delim, pos);
+    pos = s.find(delim, lastPos);
+  }
+  return;
+}
+
+ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
+    : is_supported_(false), sampling_interval_(sampling_interval_ms),
+      gpu_id_(gpu_id) {
+#ifdef __linux__
+  is_supported_ = true;
+#else
+  is_supported_ = false;
+#endif
+  if (!is_supported_) {
+    FDASSERT(false, "Currently ResourceUsageMonitor only supports Linux.")
+    return;
+  }
+}
+
+void ResourceUsageMonitor::Start() {
+  if (!is_supported_) {
+    return;
+  }
+  if (check_memory_thd_ != nullptr) {
+    FDINFO << "Memory monitoring has already started!" << std::endl;
+    return;
+  }
+  FDINFO << "Start monitoring memory!" << std::endl;
+  stop_signal_ = false;
+  check_memory_thd_.reset(new std::thread(([this]() {
+    // Note we retrieve the memory usage at the very beginning of the thread.
+    while (true) {
+#ifdef __linux__
+      rusage res;
+      if (getrusage(RUSAGE_SELF, &res) == 0) {
+        max_cpu_mem_ =
+            std::max(max_cpu_mem_, static_cast<float>(res.ru_maxrss / 1024.0));
+      }
+#endif
+#if defined(WITH_GPU)
+      std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
+      // get max_gpu_mem and max_gpu_util
+      std::vector<std::string> gpu_tokens;
+      Split(gpu_mem_info, gpu_tokens, ',');
+      max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
+      max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
+#endif
+      if (stop_signal_) {
+        break;
+      }
+      std::this_thread::sleep_for(
+          std::chrono::milliseconds(sampling_interval_));
+    }
+  })));
+}
+
+void ResourceUsageMonitor::Stop() {
+  if (!is_supported_) {
+    return;
+  }
+  if (check_memory_thd_ == nullptr) {
+    FDINFO << "Memory monitoring hasn't started yet or has stopped!"
+           << std::endl;
+    return;
+  }
+  FDINFO << "Stop monitoring memory!" << std::endl;
+  StopInternal();
+}
+
+void ResourceUsageMonitor::StopInternal() {
+  stop_signal_ = true;
+  if (check_memory_thd_ == nullptr) {
+    return;
+  }
+  if (check_memory_thd_ != nullptr) {
+    check_memory_thd_->join();
+  }
+  check_memory_thd_.reset(nullptr);
+}
+
+std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
+  std::string result = "";
+#if defined(__linux__) && defined(WITH_GPU)
+  std::string command = "nvidia-smi --id=" + std::to_string(device_id) +
+                        " --query-gpu=index,uuid,name,timestamp,memory.total,"
+                        "memory.free,memory.used,utilization.gpu,utilization."
+                        "memory --format=csv,noheader,nounits";
+  FILE *pp = popen(command.data(), "r");
+  if (!pp)
+    return "";
+  char tmp[1024];
+
+  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
+    result += tmp;
+  }
+  pclose(pp);
+#else
+  FDASSERT(false,
+           "Currently collect gpu memory info only supports Linux in GPU.")
+#endif
+  return result;
+}
+#endif // ENABLE_BENCHMARK
+
+/// Utils for precision evaluation
+#if defined(ENABLE_BENCHMARK)
+static const char KEY_VALUE_SEP = '#';
+static const char VALUE_SEP = ',';
+
+std::vector<std::string> ReadLines(const std::string &path) {
+  std::ifstream fin(path);
+  std::vector<std::string> lines;
+  std::string line;
+  if (fin.is_open()) {
+    while (getline(fin, line)) {
+      lines.push_back(line);
+    }
+  } else {
+    FDERROR << "Failed to open file " << path << std::endl;
+    std::abort();
+  }
+  fin.close();
+  return lines;
+}
+
+std::map<std::string, std::vector<std::string>>
+SplitDataLine(const std::string &data_line) {
+  std::map<std::string, std::vector<std::string>> dict;
+  std::vector<std::string> tokens, value_tokens;
+  Split(data_line, tokens, KEY_VALUE_SEP);
+  std::string key = tokens[0];
+  std::string value = tokens[1];
+  Split(value, value_tokens, VALUE_SEP);
+  dict[key] = value_tokens;
+  return dict;
+}
+
+bool ResultManager::SaveFDTensor(const FDTensor &tensor,
+                                 const std::string &path) {
+  if (tensor.CpuData() == nullptr || tensor.Numel() <= 0) {
+    FDERROR << "Input tensor is empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  if (tensor.Dtype() != FDDataType::FP32 &&
+      tensor.Dtype() != FDDataType::INT32 &&
+      tensor.Dtype() != FDDataType::INT64) {
+    FDERROR << "Only support FP32/INT32/INT64 now, but got "
+            << Str(tensor.dtype) << std::endl;
+    return false;
+  }
+  // name
+  fs << "name" << KEY_VALUE_SEP << tensor.name << "\n";
+  // shape
+  fs << "shape" << KEY_VALUE_SEP;
+  for (int i = 0; i < tensor.shape.size(); ++i) {
+    if (i < tensor.shape.size() - 1) {
+      fs << tensor.shape[i] << VALUE_SEP;
+    } else {
+      fs << tensor.shape[i];
+    }
+  }
+  fs << "\n";
+  // dtype
+  fs << "dtype" << KEY_VALUE_SEP << Str(tensor.dtype) << "\n";
+  // data
+  fs << "data" << KEY_VALUE_SEP;
+  const void *data_ptr = tensor.CpuData();
+  for (int i = 0; i < tensor.Numel(); ++i) {
+    if (tensor.Dtype() == FDDataType::INT64) {
+      if (i < tensor.Numel() - 1) {
+        fs << (static_cast<const int64_t *>(data_ptr))[i] << VALUE_SEP;
+      } else {
+        fs << (static_cast<const int64_t *>(data_ptr))[i];
+      }
+    } else if (tensor.Dtype() == FDDataType::INT32) {
+      if (i < tensor.Numel() - 1) {
+        fs << (static_cast<const int32_t *>(data_ptr))[i] << VALUE_SEP;
+      } else {
+        fs << (static_cast<const int32_t *>(data_ptr))[i];
+      }
+    } else { // FP32
+      if (i < tensor.Numel() - 1) {
+        fs << (static_cast<const float *>(data_ptr))[i] << VALUE_SEP;
+      } else {
+        fs << (static_cast<const float *>(data_ptr))[i];
+      }
+    }
+  }
+  fs << "\n";
+  fs.close();
+  return true;
+}
+
+bool ResultManager::LoadFDTensor(FDTensor *tensor, const std::string &path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  std::map<std::string, std::vector<std::string>> data;
+  // name
+  data = SplitDataLine(lines[0]);
+  tensor->name = data.begin()->first;
+  // shape
+  data = SplitDataLine(lines[1]);
+  tensor->shape.clear();
+  for (const auto &s : data.begin()->second) {
+    tensor->shape.push_back(std::stol(s));
+  }
+  // dtype
+  data = SplitDataLine(lines[2]);
+  if (data.begin()->second.at(0) == Str(FDDataType::INT64)) {
+    tensor->dtype = FDDataType::INT64;
+  } else if (data.begin()->second.at(0) == Str(FDDataType::INT32)) {
+    tensor->dtype = FDDataType::INT32;
+  } else if (data.begin()->second.at(0) == Str(FDDataType::FP32)) {
+    tensor->dtype = FDDataType::FP32;
+  } else {
+    FDERROR << "Only support FP32/INT64/INT32 now, but got "
+            << data.begin()->second.at(0) << std::endl;
+    return false;
+  }
+  // data
+  data = SplitDataLine(lines[3]);
+  tensor->Allocate(tensor->shape, tensor->dtype, tensor->name);
+  if (tensor->dtype == FDDataType::INT64) {
+    int64_t *mutable_data_ptr = static_cast<int64_t *>(tensor->MutableData());
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      mutable_data_ptr[i] = std::stol(data.begin()->second[i]);
+    }
+  } else if (tensor->dtype == FDDataType::INT32) {
+    int32_t *mutable_data_ptr = static_cast<int32_t *>(tensor->MutableData());
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      mutable_data_ptr[i] = std::stoi(data.begin()->second[i]);
+    }
+  } else { // FP32
+    float *mutable_data_ptr = static_cast<float *>(tensor->MutableData());
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      mutable_data_ptr[i] = std::stof(data.begin()->second[i]);
+    }
+  }
+  return true;
+}
+
+TensorDiff ResultManager::CalculateDiffStatis(const FDTensor &lhs,
+                                              const FDTensor &rhs) {
+  if (lhs.Numel() != rhs.Numel() || lhs.Dtype() != rhs.Dtype()) {
+    FDASSERT(false,
+             "The size and dtype of input FDTensor must be equal!"
+             " But got size %d, %d, dtype %s, %s",
+             lhs.Numel(), rhs.Numel(), Str(lhs.Dtype()).c_str(),
+             Str(rhs.Dtype()).c_str())
+  }
+  FDDataType dtype = lhs.Dtype();
+  int numel = lhs.Numel();
+  if (dtype != FDDataType::FP32 && dtype != FDDataType::INT64 &&
+      dtype != FDDataType::INT32) {
+    FDASSERT(false, "Only support FP32/INT64/INT32 now, but got %s",
+             Str(dtype).c_str())
+  }
+  if (dtype == FDDataType::INT64) {
+    std::vector<int64_t> tensor_diff(numel);
+    const int64_t *lhs_data_ptr = static_cast<const int64_t *>(lhs.CpuData());
+    const int64_t *rhs_data_ptr = static_cast<const int64_t *>(rhs.CpuData());
+    for (int i = 0; i < numel; ++i) {
+      tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
+    }
+    TensorDiff diff;
+    CalculateStatisInfo<int64_t>(tensor_diff.data(), numel, &(diff.data.mean),
+                                 &(diff.data.max), &(diff.data.min));
+    return diff;
+  } else if (dtype == FDDataType::INT32) {
+    std::vector<int32_t> tensor_diff(numel);
+    const int32_t *lhs_data_ptr = static_cast<const int32_t *>(lhs.CpuData());
+    const int32_t *rhs_data_ptr = static_cast<const int32_t *>(rhs.CpuData());
+    for (int i = 0; i < numel; ++i) {
+      tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
+    }
+    TensorDiff diff;
+    CalculateStatisInfo<float>(tensor_diff.data(), numel, &(diff.data.mean),
+                               &(diff.data.max), &(diff.data.min));
+    return diff;
+  } else { // FP32
+    std::vector<float> tensor_diff(numel);
+    const float *lhs_data_ptr = static_cast<const float *>(lhs.CpuData());
+    const float *rhs_data_ptr = static_cast<const float *>(rhs.CpuData());
+    for (int i = 0; i < numel; ++i) {
+      tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
+    }
+    TensorDiff diff;
+    CalculateStatisInfo<float>(tensor_diff.data(), numel, &(diff.data.mean),
+                               &(diff.data.max), &(diff.data.min));
+    return diff;
+  }
+}
+
+void ResultManager::SaveBenchmarkResult(const std::string &res,
+                                        const std::string &path) {
+  if (path.empty()) {
+    FDERROR << "Benchmark data path can not be empty!" << std::endl;
+    return;
+  }
+  auto openmode = std::ios::app;
+  std::ofstream fs(path, openmode);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open result file: " << path << std::endl;
+  }
+  fs << res;
+  fs.close();
+}
+
+bool ResultManager::LoadBenchmarkConfig(
+    const std::string &path,
+    std::unordered_map<std::string, std::string> *config_info) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  for (auto line : lines) {
+    std::vector<std::string> tokens;
+    Split(line, tokens, ':');
+    (*config_info)[tokens[0]] = Strip(tokens[1], ' ');
+  }
+  return true;
+}
+
+std::vector<std::vector<int32_t>>
+ResultManager::GetInputShapes(const std::string &raw_shapes) {
+  std::vector<std::vector<int32_t>> shapes;
+  std::vector<std::string> shape_tokens;
+  Split(raw_shapes, shape_tokens, ':');
+  for (auto str_shape : shape_tokens) {
+    std::vector<int32_t> shape;
+    std::string tmp_str = str_shape;
+    while (!tmp_str.empty()) {
+      int dim = atoi(tmp_str.data());
+      shape.push_back(dim);
+      size_t next_offset = tmp_str.find(",");
+      if (next_offset == std::string::npos) {
+        break;
+      } else {
+        tmp_str = tmp_str.substr(next_offset + 1);
+      }
+    }
+    shapes.push_back(shape);
+  }
+  return shapes;
+}
+
+std::vector<std::string>
+ResultManager::GetInputNames(const std::string &raw_names) {
+  std::vector<std::string> names_tokens;
+  Split(raw_names, names_tokens, ':');
+  return names_tokens;
+}
+
+std::vector<std::string> ResultManager::SplitStr(const std::string &raw_str,
+                                                 char delim) {
+  std::vector<std::string> str_tokens;
+  Split(raw_str, str_tokens, delim);
+  return str_tokens;
+}
+
+std::vector<FDDataType>
+ResultManager::GetInputDtypes(const std::string &raw_dtypes) {
+  std::vector<FDDataType> dtypes;
+  std::vector<std::string> dtypes_tokens;
+  Split(raw_dtypes, dtypes_tokens, ':');
+  for (auto dtype : dtypes_tokens) {
+    if (dtype == "FP32") {
+      dtypes.push_back(FDDataType::FP32);
+    } else if (dtype == "INT32") {
+      dtypes.push_back(FDDataType::INT32);
+    } else if (dtype == "INT64") {
+      dtypes.push_back(FDDataType::INT64);
+    } else if (dtype == "INT8") {
+      dtypes.push_back(FDDataType::INT8);
+    } else if (dtype == "UINT8") {
+      dtypes.push_back(FDDataType::UINT8);
+    } else if (dtype == "FP16") {
+      dtypes.push_back(FDDataType::FP16);
+    } else if (dtype == "FP64") {
+      dtypes.push_back(FDDataType::FP64);
+    } else {
+      dtypes.push_back(FDDataType::FP32); // default
+    }
+  }
+  return dtypes;
+}
+
+#if defined(ENABLE_VISION)
+bool ResultManager::SaveDetectionResult(const vision::DetectionResult &res,
+                                        const std::string &path) {
+  if (res.boxes.empty()) {
+    FDERROR << "DetectionResult can not be empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  // boxes
+  fs << "boxes" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.boxes.size(); ++i) {
+    for (int j = 0; j < 4; ++j) {
+      if ((i == res.boxes.size() - 1) && (j == 3)) {
+        fs << res.boxes[i][j];
+      } else {
+        fs << res.boxes[i][j] << VALUE_SEP;
+      }
+    }
+  }
+  fs << "\n";
+  // scores
+  fs << "scores" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.scores.size(); ++i) {
+    if (i < res.scores.size() - 1) {
+      fs << res.scores[i] << VALUE_SEP;
+    } else {
+      fs << res.scores[i];
+    }
+  }
+  fs << "\n";
+  // label_ids
+  fs << "label_ids" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.label_ids.size(); ++i) {
+    if (i < res.label_ids.size() - 1) {
+      fs << res.label_ids[i] << VALUE_SEP;
+    } else {
+      fs << res.label_ids[i];
+    }
+  }
+  fs << "\n";
+  // TODO(qiuyanjun): dump masks
+  fs.close();
+  return true;
+}
+
+bool ResultManager::SaveClassifyResult(const vision::ClassifyResult &res,
+                                       const std::string &path) {
+  if (res.label_ids.empty()) {
+    FDERROR << "ClassifyResult can not be empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  // label_ids
+  fs << "label_ids" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.label_ids.size(); ++i) {
+    if (i < res.label_ids.size() - 1) {
+      fs << res.label_ids[i] << VALUE_SEP;
+    } else {
+      fs << res.label_ids[i];
+    }
+  }
+  fs << "\n";
+  // scores
+  fs << "scores" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.scores.size(); ++i) {
+    if (i < res.scores.size() - 1) {
+      fs << res.scores[i] << VALUE_SEP;
+    } else {
+      fs << res.scores[i];
+    }
+  }
+  fs << "\n";
+  fs.close();
+  return true;
+}
+
+bool ResultManager::SaveSegmentationResult(
+    const vision::SegmentationResult &res, const std::string &path) {
+  if (res.label_map.empty()) {
+    FDERROR << "SegmentationResult can not be empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  // label_map
+  fs << "label_map" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.label_map.size(); ++i) {
+    if (i < res.label_map.size() - 1) {
+      fs << static_cast<int32_t>(res.label_map[i]) << VALUE_SEP;
+    } else {
+      fs << static_cast<int32_t>(res.label_map[i]);
+    }
+  }
+  fs << "\n";
+  // score_map
+  if (res.contain_score_map) {
+    fs << "score_map" << KEY_VALUE_SEP;
+    for (int i = 0; i < res.score_map.size(); ++i) {
+      if (i < res.score_map.size() - 1) {
+        fs << res.score_map[i] << VALUE_SEP;
+      } else {
+        fs << res.score_map[i];
+      }
+    }
+    fs << "\n";
+  }
+  fs.close();
+  return true;
+}
+
+bool ResultManager::SaveOCRDetResult(const std::vector<std::array<int, 8>> &res,
+                                     const std::string &path) {
+  if (res.empty()) {
+    FDERROR << "OCRDetResult can not be empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  // boxes
+  fs << "boxes" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.size(); ++i) {
+    for (int j = 0; j < 8; ++j) {
+      if ((i == res.size() - 1) && (j == 7)) {
+        fs << res[i][j];
+      } else {
+        fs << res[i][j] << VALUE_SEP;
+      }
+    }
+  }
+  fs << "\n";
+  fs.close();
+  return true;
+}
+
+bool ResultManager::SaveMattingResult(const vision::MattingResult &res,
+                                      const std::string &path) {
+  if (res.alpha.empty()) {
+    FDERROR << "MattingResult can not be empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  // alpha
+  fs << "alpha" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.alpha.size(); ++i) {
+    if (i < res.alpha.size() - 1) {
+      fs << res.alpha[i] << VALUE_SEP;
+    } else {
+      fs << res.alpha[i];
+    }
+  }
+  fs << "\n";
+  // foreground
+  if (res.contain_foreground) {
+    fs << "foreground" << KEY_VALUE_SEP;
+    for (int i = 0; i < res.foreground.size(); ++i) {
+      if (i < res.foreground.size() - 1) {
+        fs << res.foreground[i] << VALUE_SEP;
+      } else {
+        fs << res.foreground[i];
+      }
+    }
+    fs << "\n";
+  }
+  fs.close();
+  return true;
+}
+
+bool ResultManager::LoadDetectionResult(vision::DetectionResult *res,
+                                        const std::string &path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  std::map<std::string, std::vector<std::string>> data;
+
+  // boxes
+  data = SplitDataLine(lines[0]);
+  int boxes_num = data.begin()->second.size() / 4;
+  res->Resize(boxes_num);
+  for (int i = 0; i < boxes_num; ++i) {
+    res->boxes[i][0] = std::stof(data.begin()->second[i * 4 + 0]);
+    res->boxes[i][1] = std::stof(data.begin()->second[i * 4 + 1]);
+    res->boxes[i][2] = std::stof(data.begin()->second[i * 4 + 2]);
+    res->boxes[i][3] = std::stof(data.begin()->second[i * 4 + 3]);
+  }
+  // scores
+  data = SplitDataLine(lines[1]);
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->scores[i] = std::stof(data.begin()->second[i]);
+  }
+  // label_ids
+  data = SplitDataLine(lines[2]);
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->label_ids[i] = std::stoi(data.begin()->second[i]);
+  }
+  // TODO(qiuyanjun): load masks
+  return true;
+}
+
+bool ResultManager::LoadClassifyResult(vision::ClassifyResult *res,
+                                       const std::string &path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  std::map<std::string, std::vector<std::string>> data;
+  // label_ids
+  data = SplitDataLine(lines[0]);
+  res->Resize(data.begin()->second.size());
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->label_ids[i] = std::stoi(data.begin()->second[i]);
+  }
+  // scores
+  data = SplitDataLine(lines[1]);
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->scores[i] = std::stof(data.begin()->second[i]);
+  }
+  return true;
+}
+
+bool ResultManager::LoadSegmentationResult(vision::SegmentationResult *res,
+                                           const std::string &path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  if (lines.size() > 1) {
+    res->contain_score_map = true;
+  }
+  std::map<std::string, std::vector<std::string>> data;
+  // label_map
+  data = SplitDataLine(lines[0]);
+  res->Resize(data.begin()->second.size());
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->label_map[i] = std::stoi(data.begin()->second[i]);
+  }
+  // score_map
+  if (lines.size() > 1) {
+    data = SplitDataLine(lines[1]);
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      res->score_map[i] = std::stof(data.begin()->second[i]);
+    }
+  }
+  return true;
+}
+
+bool ResultManager::LoadOCRDetResult(std::vector<std::array<int, 8>> *res,
+                                     const std::string &path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  std::map<std::string, std::vector<std::string>> data;
+  // boxes
+  data = SplitDataLine(lines[0]);
+  int boxes_num = data.begin()->second.size() / 8;
+  res->resize(boxes_num);
+  for (int i = 0; i < boxes_num; ++i) {
+    for (int j = 0; j < 8; ++j) {
+      (*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]);
+    }
+  }
+  return true;
+}
+
+bool ResultManager::LoadMattingResult(vision::MattingResult *res,
+                                      const std::string &path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  if (lines.size() > 1) {
+    res->contain_foreground = true;
+  }
+  std::map<std::string, std::vector<std::string>> data;
+  // alpha
+  data = SplitDataLine(lines[0]);
+  res->Resize(data.begin()->second.size());
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->alpha[i] = std::stof(data.begin()->second[i]);
+  }
+  // foreground
+  if (lines.size() > 1) {
+    data = SplitDataLine(lines[1]);
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      res->foreground[i] = std::stof(data.begin()->second[i]);
+    }
+  }
+  return true;
+}
+
+DetectionDiff
+ResultManager::CalculateDiffStatis(const vision::DetectionResult &lhs,
+                                   const vision::DetectionResult &rhs,
+                                   const float &score_threshold) {
+  vision::DetectionResult lhs_sort = lhs;
+  vision::DetectionResult rhs_sort = rhs;
+  // lex sort by x(w) & y(h)
+  vision::utils::LexSortDetectionResultByXY(&lhs_sort);
+  vision::utils::LexSortDetectionResultByXY(&rhs_sort);
+  // get value diff & trunc it by score_threshold
+  const int boxes_num = std::min(lhs_sort.boxes.size(), rhs_sort.boxes.size());
+  std::vector<float> boxes_diff;
+  std::vector<float> scores_diff;
+  std::vector<int32_t> labels_diff;
+  // TODO(qiuyanjun): process the diff of masks.
+  for (int i = 0; i < boxes_num; ++i) {
+    if (lhs_sort.scores[i] > score_threshold &&
+        rhs_sort.scores[i] > score_threshold) {
+      scores_diff.push_back(lhs_sort.scores[i] - rhs_sort.scores[i]);
+      labels_diff.push_back(lhs_sort.label_ids[i] - rhs_sort.label_ids[i]);
+      boxes_diff.push_back(lhs_sort.boxes[i][0] - rhs_sort.boxes[i][0]);
+      boxes_diff.push_back(lhs_sort.boxes[i][1] - rhs_sort.boxes[i][1]);
+      boxes_diff.push_back(lhs_sort.boxes[i][2] - rhs_sort.boxes[i][2]);
+      boxes_diff.push_back(lhs_sort.boxes[i][3] - rhs_sort.boxes[i][3]);
+    }
+  }
+  FDASSERT(boxes_diff.size() > 0,
+           "Can't get any valid boxes while score_threshold is %f, "
+           "The boxes.size of lhs is %d, the boxes.size of rhs is %d",
+           score_threshold, lhs_sort.boxes.size(), rhs_sort.boxes.size())
+
+  DetectionDiff diff;
+  CalculateStatisInfo<float>(boxes_diff.data(), boxes_diff.size(),
+                             &(diff.boxes.mean), &(diff.boxes.max),
+                             &(diff.boxes.min));
+  CalculateStatisInfo<float>(scores_diff.data(), scores_diff.size(),
+                             &(diff.scores.mean), &(diff.scores.max),
+                             &(diff.scores.min));
+  CalculateStatisInfo<int32_t>(labels_diff.data(), labels_diff.size(),
+                               &(diff.labels.mean), &(diff.labels.max),
+                               &(diff.labels.min));
+  return diff;
+}
+
+ClassifyDiff
+ResultManager::CalculateDiffStatis(const vision::ClassifyResult &lhs,
+                                   const vision::ClassifyResult &rhs) {
+  const int class_nums = std::min(lhs.label_ids.size(), rhs.label_ids.size());
+  std::vector<float> scores_diff;
+  std::vector<int32_t> labels_diff;
+  for (int i = 0; i < class_nums; ++i) {
+    scores_diff.push_back(lhs.scores[i] - rhs.scores[i]);
+    labels_diff.push_back(lhs.label_ids[i] - rhs.label_ids[i]);
+  }
+
+  ClassifyDiff diff;
+  CalculateStatisInfo<float>(scores_diff.data(), scores_diff.size(),
+                             &(diff.scores.mean), &(diff.scores.max),
+                             &(diff.scores.min));
+  CalculateStatisInfo<int32_t>(labels_diff.data(), labels_diff.size(),
+                               &(diff.labels.mean), &(diff.labels.max),
+                               &(diff.labels.min));
+  return diff;
+}
+
+SegmentationDiff
+ResultManager::CalculateDiffStatis(const vision::SegmentationResult &lhs,
+                                   const vision::SegmentationResult &rhs) {
+  const int pixel_nums = std::min(lhs.label_map.size(), rhs.label_map.size());
+  std::vector<int32_t> labels_diff;
+  std::vector<float> scores_diff;
+  for (int i = 0; i < pixel_nums; ++i) {
+    labels_diff.push_back(lhs.label_map[i] - rhs.label_map[i]);
+    if (lhs.contain_score_map && rhs.contain_score_map) {
+      scores_diff.push_back(lhs.score_map[i] - rhs.score_map[i]);
+    }
+  }
+  SegmentationDiff diff;
+  CalculateStatisInfo<int32_t>(labels_diff.data(), labels_diff.size(),
+                               &(diff.labels.mean), &(diff.labels.max),
+                               &(diff.labels.min));
+  if (lhs.contain_score_map && rhs.contain_score_map) {
+    CalculateStatisInfo<float>(scores_diff.data(), scores_diff.size(),
+                               &(diff.scores.mean), &(diff.scores.max),
+                               &(diff.scores.min));
+  }
+  return diff;
+}
+
+OCRDetDiff
+ResultManager::CalculateDiffStatis(const std::vector<std::array<int, 8>> &lhs,
+                                   const std::vector<std::array<int, 8>> &rhs) {
+  const int boxes_nums = std::min(lhs.size(), rhs.size());
+  std::vector<std::array<int, 8>> lhs_sort = lhs;
+  std::vector<std::array<int, 8>> rhs_sort = rhs;
+  // lex sort by x(w) & y(h)
+  vision::utils::LexSortOCRDetResultByXY(&lhs_sort);
+  vision::utils::LexSortOCRDetResultByXY(&rhs_sort);
+  // get value diff
+  const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size());
+  std::vector<float> boxes_diff;
+  for (int i = 0; i < boxes_num; ++i) {
+    for (int j = 0; j < 8; ++j) {
+      boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]);
+    }
+  }
+
+  OCRDetDiff diff;
+  CalculateStatisInfo<float>(boxes_diff.data(), boxes_diff.size(),
+                             &(diff.boxes.mean), &(diff.boxes.max),
+                             &(diff.boxes.min));
+  return diff;
+}
+
+MattingDiff
+ResultManager::CalculateDiffStatis(const vision::MattingResult &lhs,
+                                   const vision::MattingResult &rhs) {
+  const int pixel_nums = std::min(lhs.alpha.size(), rhs.alpha.size());
+  std::vector<float> alpha_diff;
+  std::vector<float> foreground_diff;
+  for (int i = 0; i < pixel_nums; ++i) {
+    alpha_diff.push_back(lhs.alpha[i] - rhs.alpha[i]);
+    if (lhs.contain_foreground && rhs.contain_foreground) {
+      foreground_diff.push_back(lhs.foreground[i] - rhs.foreground[i]);
+    }
+  }
+  MattingDiff diff;
+  CalculateStatisInfo<float>(alpha_diff.data(), alpha_diff.size(),
+                             &(diff.alpha.mean), &(diff.alpha.max),
+                             &(diff.alpha.min));
+  if (lhs.contain_foreground && rhs.contain_foreground) {
+    CalculateStatisInfo<float>(foreground_diff.data(), foreground_diff.size(),
+                               &(diff.foreground.mean), &(diff.foreground.max),
+                               &(diff.foreground.min));
+  }
+  return diff;
+}
+
+#endif // ENABLE_VISION
+#endif // ENABLE_BENCHMARK
+
+} // namespace benchmark
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/benchmark/utils.h b/libs/ultrainfer/ultrainfer/benchmark/utils.h
new file mode 100755
index 0000000000..db8c0d1ec2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/benchmark/utils.h
@@ -0,0 +1,204 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include <memory>
+#include <thread> // NOLINT
+#include <unordered_map>
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+#include "ultrainfer/vision/common/result.h"
+#endif
+
+namespace ultrainfer {
+namespace benchmark {
+
+#if defined(ENABLE_BENCHMARK)
+/*! @brief ResourceUsageMonitor object used when to collect memory info.
+ */
+class ULTRAINFER_DECL ResourceUsageMonitor {
+public:
+  /** \brief  Set sampling_interval_ms and gpu_id for ResourceUsageMonitor.
+   *
+   * \param[in] sampling_interval_ms How often to collect memory info(ms).
+   * \param[in] gpu_id Device(gpu) id, default 0.
+   */
+  explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0);
+
+  ~ResourceUsageMonitor() { StopInternal(); }
+
+  /// Start memory info collect
+  void Start();
+  /// Stop memory info collect
+  void Stop();
+  /// Get maximum cpu memory usage
+  float GetMaxCpuMem() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_cpu_mem_;
+  }
+  /// Get maximum gpu memory usage
+  float GetMaxGpuMem() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_gpu_mem_;
+  }
+  /// Get maximum gpu util
+  float GetMaxGpuUtil() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_gpu_util_;
+  }
+
+  ResourceUsageMonitor(ResourceUsageMonitor &) = delete;
+  ResourceUsageMonitor &operator=(const ResourceUsageMonitor &) = delete;
+  ResourceUsageMonitor(ResourceUsageMonitor &&) = delete;
+  ResourceUsageMonitor &operator=(const ResourceUsageMonitor &&) = delete;
+
+private:
+  void StopInternal();
+  // Get current gpu memory info
+  std::string GetCurrentGpuMemoryInfo(int device_id);
+
+  bool is_supported_ = false;
+  bool stop_signal_ = false;
+  const int sampling_interval_;
+  float max_cpu_mem_ = 0.0f; // MB
+  float max_gpu_mem_ = 0.0f; // MB
+  float max_gpu_util_ = 0.0f;
+  const int gpu_id_ = 0;
+  std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
+};
+
+// Remove the ch characters at both ends of str
+ULTRAINFER_DECL std::string Strip(const std::string &str, char ch = ' ');
+
+// Split string
+ULTRAINFER_DECL void Split(const std::string &s,
+                           std::vector<std::string> &tokens, char delim = ' ');
+
+/// Diff values for precision evaluation
+struct ULTRAINFER_DECL BaseDiff {};
+
+struct ULTRAINFER_DECL EvalStatis {
+  double mean = -1.0;
+  double min = -1.0;
+  double max = -1.0;
+};
+
+struct ULTRAINFER_DECL TensorDiff : public BaseDiff {
+  EvalStatis data;
+};
+
+#if defined(ENABLE_VISION)
+struct ULTRAINFER_DECL DetectionDiff : public BaseDiff {
+  EvalStatis boxes;
+  EvalStatis scores;
+  EvalStatis labels;
+};
+
+struct ULTRAINFER_DECL ClassifyDiff : public BaseDiff {
+  EvalStatis scores;
+  EvalStatis labels;
+};
+
+struct ULTRAINFER_DECL SegmentationDiff : public BaseDiff {
+  EvalStatis scores;
+  EvalStatis labels;
+};
+
+struct ULTRAINFER_DECL OCRDetDiff : public BaseDiff {
+  EvalStatis boxes;
+};
+
+struct ULTRAINFER_DECL MattingDiff : public BaseDiff {
+  EvalStatis alpha;
+  EvalStatis foreground;
+};
+
+#endif // ENABLE_VISION
+#endif // ENABLE_BENCHMARK
+
+/// Utils for precision evaluation
+struct ULTRAINFER_DECL ResultManager {
+#if defined(ENABLE_BENCHMARK)
+  /// Save & Load functions for FDTensor result.
+  static bool SaveFDTensor(const FDTensor &tensor, const std::string &path);
+  static bool LoadFDTensor(FDTensor *tensor, const std::string &path);
+  /// Calculate diff value between two FDTensor results.
+  static TensorDiff CalculateDiffStatis(const FDTensor &lhs,
+                                        const FDTensor &rhs);
+  /// Save Benchmark data
+  static void SaveBenchmarkResult(const std::string &res,
+                                  const std::string &path);
+  /// Load Benchmark config
+  static bool LoadBenchmarkConfig(
+      const std::string &path,
+      std::unordered_map<std::string, std::string> *config_info);
+  /// Get Input Shapes
+  static std::vector<std::vector<int32_t>>
+  GetInputShapes(const std::string &raw_shapes);
+  /// Get Input Names
+  static std::vector<std::string> GetInputNames(const std::string &raw_names);
+  /// Get Input Dtypes
+  static std::vector<FDDataType> GetInputDtypes(const std::string &raw_dtypes);
+  /// Split string
+  static std::vector<std::string> SplitStr(const std::string &raw_str,
+                                           char delim = ':');
+#if defined(ENABLE_VISION)
+  /// Save & Load functions for basic results.
+  static bool SaveDetectionResult(const vision::DetectionResult &res,
+                                  const std::string &path);
+  static bool LoadDetectionResult(vision::DetectionResult *res,
+                                  const std::string &path);
+  static bool SaveClassifyResult(const vision::ClassifyResult &res,
+                                 const std::string &path);
+  static bool LoadClassifyResult(vision::ClassifyResult *res,
+                                 const std::string &path);
+  static bool SaveSegmentationResult(const vision::SegmentationResult &res,
+                                     const std::string &path);
+  static bool LoadSegmentationResult(vision::SegmentationResult *res,
+                                     const std::string &path);
+  static bool SaveOCRDetResult(const std::vector<std::array<int, 8>> &res,
+                               const std::string &path);
+  static bool LoadOCRDetResult(std::vector<std::array<int, 8>> *res,
+                               const std::string &path);
+  static bool SaveMattingResult(const vision::MattingResult &res,
+                                const std::string &path);
+  static bool LoadMattingResult(vision::MattingResult *res,
+                                const std::string &path);
+  /// Calculate diff value between two basic results.
+  static DetectionDiff CalculateDiffStatis(const vision::DetectionResult &lhs,
+                                           const vision::DetectionResult &rhs,
+                                           const float &score_threshold = 0.3f);
+  static ClassifyDiff CalculateDiffStatis(const vision::ClassifyResult &lhs,
+                                          const vision::ClassifyResult &rhs);
+  static SegmentationDiff
+  CalculateDiffStatis(const vision::SegmentationResult &lhs,
+                      const vision::SegmentationResult &rhs);
+  static OCRDetDiff
+  CalculateDiffStatis(const std::vector<std::array<int, 8>> &lhs,
+                      const std::vector<std::array<int, 8>> &rhs);
+  static MattingDiff CalculateDiffStatis(const vision::MattingResult &lhs,
+                                         const vision::MattingResult &rhs);
+#endif // ENABLE_VISION
+#endif // ENABLE_BENCHMARK
+};
+
+} // namespace benchmark
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/allocate.cc b/libs/ultrainfer/ultrainfer/core/allocate.cc
new file mode 100755
index 0000000000..8d1a9f680f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/allocate.cc
@@ -0,0 +1,45 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifdef WITH_GPU
+#include <cuda_runtime_api.h>
+#endif
+
+#include "ultrainfer/core/allocate.h"
+
+namespace ultrainfer {
+
+bool FDHostAllocator::operator()(void **ptr, size_t size) const {
+  *ptr = malloc(size);
+  return *ptr != nullptr;
+}
+
+void FDHostFree::operator()(void *ptr) const { free(ptr); }
+
+#ifdef WITH_GPU
+
+bool FDDeviceAllocator::operator()(void **ptr, size_t size) const {
+  return cudaMalloc(ptr, size) == cudaSuccess;
+}
+
+void FDDeviceFree::operator()(void *ptr) const { cudaFree(ptr); }
+
+bool FDDeviceHostAllocator::operator()(void **ptr, size_t size) const {
+  return cudaMallocHost(ptr, size) == cudaSuccess;
+}
+
+void FDDeviceHostFree::operator()(void *ptr) const { cudaFreeHost(ptr); }
+
+#endif
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/allocate.h b/libs/ultrainfer/ultrainfer/core/allocate.h
new file mode 100755
index 0000000000..6c2650242e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/allocate.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <memory>
+#include <new>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+class ULTRAINFER_DECL FDHostAllocator {
+public:
+  bool operator()(void **ptr, size_t size) const;
+};
+
+class ULTRAINFER_DECL FDHostFree {
+public:
+  void operator()(void *ptr) const;
+};
+
+#ifdef WITH_GPU
+
+class ULTRAINFER_DECL FDDeviceAllocator {
+public:
+  bool operator()(void **ptr, size_t size) const;
+};
+
+class ULTRAINFER_DECL FDDeviceFree {
+public:
+  void operator()(void *ptr) const;
+};
+
+class ULTRAINFER_DECL FDDeviceHostAllocator {
+public:
+  bool operator()(void **ptr, size_t size) const;
+};
+
+class ULTRAINFER_DECL FDDeviceHostFree {
+public:
+  void operator()(void *ptr) const;
+};
+
+#endif
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/config.h.in b/libs/ultrainfer/ultrainfer/core/config.h.in
new file mode 100755
index 0000000000..f51e321cb1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/config.h.in
@@ -0,0 +1,86 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#ifndef ULTRAINFER_LIB
+#cmakedefine ULTRAINFER_LIB
+#endif
+
+#ifndef LIBRARY_NAME
+#cmakedefine LIBRARY_NAME @LIBRARY_NAME@
+#endif
+
+#ifndef PY_LIBRARY_NAME
+#cmakedefine PY_LIBRARY_NAME @PY_LIBRARY_NAME@
+#endif
+
+#ifndef ENABLE_PADDLE2ONNX
+#cmakedefine ENABLE_PADDLE2ONNX
+#endif
+
+#ifndef ENABLE_ORT_BACKEND
+#cmakedefine ENABLE_ORT_BACKEND
+#endif
+
+#ifndef ENABLE_PADDLE_BACKEND
+#cmakedefine ENABLE_PADDLE_BACKEND
+#endif
+
+#ifndef ENABLE_POROS_BACKEND
+#cmakedefine ENABLE_POROS_BACKEND
+#endif
+
+#ifndef ENABLE_OPENVINO_BACKEND
+#cmakedefine ENABLE_OPENVINO_BACKEND
+#endif
+
+#ifndef WITH_GPU
+#cmakedefine WITH_GPU
+#endif
+
+#ifndef WITH_KUNLUNXIN
+#cmakedefine WITH_KUNLUNXIN
+#endif
+
+#ifndef WITH_DIRECTML
+#cmakedefine WITH_DIRECTML
+#endif
+
+#ifndef ENABLE_TRT_BACKEND
+#cmakedefine ENABLE_TRT_BACKEND
+#endif
+
+#ifndef ENABLE_VISION
+#cmakedefine ENABLE_VISION
+#endif
+
+#ifndef ENABLE_FLYCV
+#cmakedefine ENABLE_FLYCV
+#endif
+
+#ifndef ENABLE_TEXT
+#cmakedefine ENABLE_TEXT
+#endif
+
+#ifndef ENABLE_BENCHMARK
+#cmakedefine ENABLE_BENCHMARK
+#endif
+
+#ifndef ENABLE_HORIZON_BACKEND
+#cmakedefine ENABLE_HORIZON_BACKEND
+#endif
+
+#ifndef ENABLE_TVM_BACKEND
+#cmakedefine ENABLE_TVM_BACKEND
+#endif
diff --git a/libs/ultrainfer/ultrainfer/core/fd_scalar.h b/libs/ultrainfer/ultrainfer/core/fd_scalar.h
new file mode 100755
index 0000000000..bd152e3907
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/fd_scalar.h
@@ -0,0 +1,121 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <cstdint>
+#include <limits>
+
+#include "ultrainfer/core/fd_type.h"
+#include "ultrainfer/core/float16.h"
+
+namespace ultrainfer {
+
+class Scalar {
+public:
+  // Constructor support implicit
+  Scalar() : Scalar(0) {}
+  Scalar(double val) : dtype_(FDDataType::FP64) { // NOLINT
+    data_.f64 = val;
+  }
+
+  Scalar(float val) : dtype_(FDDataType::FP32) { // NOLINT
+    data_.f32 = val;
+  }
+
+  Scalar(float16 val) : dtype_(FDDataType::FP16) { // NOLINT
+    data_.f16 = val;
+  }
+
+  Scalar(int64_t val) : dtype_(FDDataType::INT64) { // NOLINT
+    data_.i64 = val;
+  }
+
+  Scalar(int32_t val) : dtype_(FDDataType::INT32) { // NOLINT
+    data_.i32 = val;
+  }
+
+  Scalar(int16_t val) : dtype_(FDDataType::INT16) { // NOLINT
+    data_.i16 = val;
+  }
+
+  Scalar(int8_t val) : dtype_(FDDataType::INT8) { // NOLINT
+    data_.i8 = val;
+  }
+
+  Scalar(uint8_t val) : dtype_(FDDataType::UINT8) { // NOLINT
+    data_.ui8 = val;
+  }
+
+  Scalar(bool val) : dtype_(FDDataType::BOOL) { // NOLINT
+    data_.b = val;
+  }
+
+  // The compatible method for fliud operators,
+  // and it will be removed in the future.
+  explicit Scalar(const std::string &str_value) : dtype_(FDDataType::FP64) {
+    if (str_value == "inf") {
+      data_.f64 = std::numeric_limits<double>::infinity();
+    } else if (str_value == "-inf") {
+      data_.f64 = -std::numeric_limits<double>::infinity();
+    } else if (str_value == "nan") {
+      data_.f64 = std::numeric_limits<double>::quiet_NaN();
+    } else {
+      data_.f64 = std::stod(str_value);
+    }
+  }
+
+  template <typename RT> inline RT to() const {
+    switch (dtype_) {
+    case FDDataType::FP32:
+      return static_cast<RT>(data_.f32);
+    case FDDataType::FP64:
+      return static_cast<RT>(data_.f64);
+    case FDDataType::FP16:
+      return static_cast<RT>(data_.f16);
+    case FDDataType::INT32:
+      return static_cast<RT>(data_.i32);
+    case FDDataType::INT64:
+      return static_cast<RT>(data_.i64);
+    case FDDataType::INT16:
+      return static_cast<RT>(data_.i16);
+    case FDDataType::INT8:
+      return static_cast<RT>(data_.i8);
+    case FDDataType::UINT8:
+      return static_cast<RT>(data_.ui8);
+    case FDDataType::BOOL:
+      return static_cast<RT>(data_.b);
+    default:
+      FDASSERT(false, "Invalid enum scalar data type `%s`.",
+               Str(dtype_).c_str());
+    }
+  }
+
+  FDDataType dtype() const { return dtype_; }
+
+private:
+  FDDataType dtype_;
+  union data {
+    bool b;
+    int8_t i8;
+    int16_t i16;
+    int32_t i32;
+    int64_t i64;
+    uint8_t ui8;
+    float16 f16;
+    float f32;
+    double f64;
+  } data_;
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/fd_tensor.cc b/libs/ultrainfer/ultrainfer/core/fd_tensor.cc
new file mode 100755
index 0000000000..de4f368a4d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/fd_tensor.cc
@@ -0,0 +1,447 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/core/fd_tensor.h"
+
+#include <algorithm>
+#include <cstring>
+
+#include "ultrainfer/core/float16.h"
+#include "ultrainfer/utils/utils.h"
+#ifdef WITH_GPU
+#include <cuda_runtime_api.h>
+#endif
+
+namespace ultrainfer {
+
+void *FDTensor::MutableData() {
+  if (external_data_ptr != nullptr) {
+    return external_data_ptr;
+  }
+  return buffer_;
+}
+
+void *FDTensor::Data() {
+  if (external_data_ptr != nullptr) {
+    return external_data_ptr;
+  }
+  return buffer_;
+}
+
+const void *FDTensor::Data() const {
+  if (external_data_ptr != nullptr) {
+    return external_data_ptr;
+  }
+  return buffer_;
+}
+
+void FDTensor::StopSharing() {
+  if (IsShared()) {
+    ReallocFn(Nbytes());
+    CopyBuffer(buffer_, external_data_ptr, Nbytes());
+    external_data_ptr = nullptr;
+  }
+}
+
+const void *FDTensor::CpuData() const {
+  if (device == Device::GPU) {
+#ifdef WITH_GPU
+    auto *cpu_ptr = const_cast<std::vector<int8_t> *>(&temporary_cpu_buffer);
+    cpu_ptr->resize(Nbytes());
+    // need to copy cuda mem to cpu first
+    if (external_data_ptr != nullptr) {
+      FDASSERT(cudaMemcpy(cpu_ptr->data(), external_data_ptr, Nbytes(),
+                          cudaMemcpyDeviceToHost) == 0,
+               "[ERROR] Error occurs while copy memory from GPU to CPU");
+
+    } else {
+      FDASSERT(cudaMemcpy(cpu_ptr->data(), buffer_, Nbytes(),
+                          cudaMemcpyDeviceToHost) == 0,
+               "[ERROR] Error occurs while buffer copy memory from GPU to CPU");
+    }
+    return cpu_ptr->data();
+#else
+    FDASSERT(false,
+             "The UltraInfer didn't compile under -DWITH_GPU=ON, so this is "
+             "an unexpected problem happend.");
+#endif
+  }
+  return Data();
+}
+
+void FDTensor::SetExternalData(const std::vector<int64_t> &new_shape,
+                               const FDDataType &data_type, void *data_buffer,
+                               const Device &new_device, int new_device_id) {
+  dtype = data_type;
+  shape.assign(new_shape.begin(), new_shape.end());
+  external_data_ptr = data_buffer;
+  device = new_device;
+  device_id = new_device_id;
+}
+
+void FDTensor::ExpandDim(int64_t axis) {
+  size_t ndim = shape.size();
+  FDASSERT(axis >= 0 && axis <= ndim,
+           "The allowed 'axis' must be in range of (0, %lu)!", ndim);
+  shape.insert(shape.begin() + axis, 1);
+}
+
+void FDTensor::Squeeze(int64_t axis) {
+  size_t ndim = shape.size();
+  FDASSERT(axis >= 0 && axis < ndim,
+           "The allowed 'axis' must be in range of (0, %lu)!", ndim);
+  FDASSERT(shape[axis] == 1,
+           "The No.%ld dimension of shape should be 1, but it is %ld!",
+           (long)axis, (long)shape[axis]);
+  shape.erase(shape.begin() + axis);
+}
+
+void FDTensor::Allocate(const std::vector<int64_t> &new_shape,
+                        const FDDataType &data_type,
+                        const std::string &tensor_name,
+                        const Device &new_device) {
+  dtype = data_type;
+  name = tensor_name;
+  shape.assign(new_shape.begin(), new_shape.end());
+  device = new_device;
+  size_t nbytes = Nbytes();
+  FDASSERT(ReallocFn(nbytes),
+           "The UltraInfer FDTensor allocate cpu memory error");
+}
+
+int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
+
+int FDTensor::Numel() const {
+  return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
+}
+
+void FDTensor::Resize(size_t new_nbytes) { ReallocFn(new_nbytes); }
+
+void FDTensor::Resize(const std::vector<int64_t> &new_shape) {
+  int numel = Numel();
+  int new_numel = std::accumulate(new_shape.begin(), new_shape.end(), 1,
+                                  std::multiplies<int>());
+  if (new_numel > numel || external_data_ptr != nullptr) {
+    size_t nbytes = new_numel * FDDataTypeSize(dtype);
+    ReallocFn(nbytes);
+  }
+  shape.assign(new_shape.begin(), new_shape.end());
+  external_data_ptr = nullptr;
+}
+
+void FDTensor::Resize(const std::vector<int64_t> &new_shape,
+                      const FDDataType &data_type,
+                      const std::string &tensor_name,
+                      const Device &new_device) {
+  if (device != new_device) {
+    FreeFn();
+  }
+  external_data_ptr = nullptr;
+  name = tensor_name;
+  device = new_device;
+  dtype = data_type;
+  int new_nbytes = std::accumulate(new_shape.begin(), new_shape.end(), 1,
+                                   std::multiplies<int>()) *
+                   FDDataTypeSize(data_type);
+  ReallocFn(new_nbytes);
+  shape.assign(new_shape.begin(), new_shape.end());
+}
+
+bool FDTensor::Reshape(const std::vector<int64_t> &new_shape) {
+  int numel = Numel();
+  const int64_t unk_dim_val = -1;
+  const int64_t copy_dim_val = 0;
+
+  std::vector<int64_t> output_shape(new_shape.size(), 0);
+  int64_t capacity = 1;
+  int unk_dim_idx = -1;
+  for (size_t i = 0; i < new_shape.size(); ++i) {
+    if (new_shape[i] == unk_dim_val) {
+      FDASSERT(unk_dim_idx == -1,
+               "Only one dimension value of 'shape' in ReshapeOp can "
+               "be -1. But received shape = [%s], shape[%d] is also -1.",
+               Str(new_shape).c_str(), i);
+      unk_dim_idx = i;
+    } else if (new_shape[i] == copy_dim_val) {
+      FDASSERT(i < shape.size(),
+               "The index of 0 in `shape` must be less than "
+               "the input tensor X's dimensions. "
+               "But received shape = [%s], shape[%d] = 0, X's shape = [%s], "
+               "X's dimensions = %d.",
+               Str(new_shape).c_str(), i, Str(shape).c_str(), shape.size());
+    } else {
+      FDASSERT(new_shape[i] > 0,
+               "Each dimension value of 'shape' in ReshapeOp must not "
+               "be negative except one unknown dimension. "
+               "But received  shape = [%s], shape[%d] = %d.",
+               Str(new_shape).c_str(), i, new_shape[i]);
+    }
+    capacity *= (new_shape[i] ? new_shape[i] : shape[i]);
+    output_shape[i] = (new_shape[i] ? new_shape[i] : shape[i]);
+  }
+  if (unk_dim_idx != -1) {
+    output_shape[unk_dim_idx] = -numel / capacity;
+    FDASSERT(output_shape[unk_dim_idx] * capacity == -numel,
+             "The 'shape' attribute in ReshapeOp is invalid. "
+             "The input tensor X'size must be divisible by known "
+             "capacity of 'shape'. "
+             "But received X's shape = [%s], X's size = %d, "
+             "'shape' is [%s], known capacity of 'shape' is %d.",
+             Str(shape).c_str(), numel, Str(new_shape).c_str(), capacity);
+  } else {
+    FDASSERT(numel == capacity,
+             "The 'shape' in ReshapeOp is invalid. "
+             "The input tensor X'size must be equal to the capacity of "
+             "'shape'. "
+             "But received X's shape = [%s], X's size = %d, 'shape' is "
+             "[%s], the capacity of 'shape' is %d.",
+             Str(shape).c_str(), numel, Str(shape).c_str(), capacity);
+  }
+  shape = output_shape;
+  return true;
+}
+
+void FDTensor::PrintInfo(const std::string &prefix) const {
+  std::cout << prefix << ": name=" << name << ", shape=";
+  for (int i = 0; i < shape.size(); ++i) {
+    std::cout << shape[i] << " ";
+  }
+  std::cout << ", buffer_=" << buffer_
+            << ", external_data_ptr=" << external_data_ptr;
+  double mean = 0;
+  double max = -99999999;
+  double min = 99999999;
+  if (dtype == FDDataType::FP32) {
+    CalculateStatisInfo<float>(CpuData(), Numel(), &mean, &max, &min);
+  } else if (dtype == FDDataType::FP64) {
+    CalculateStatisInfo<double>(CpuData(), Numel(), &mean, &max, &min);
+  } else if (dtype == FDDataType::INT8) {
+    CalculateStatisInfo<int8_t>(CpuData(), Numel(), &mean, &max, &min);
+  } else if (dtype == FDDataType::UINT8) {
+    CalculateStatisInfo<uint8_t>(CpuData(), Numel(), &mean, &max, &min);
+  } else if (dtype == FDDataType::INT32) {
+    CalculateStatisInfo<int32_t>(CpuData(), Numel(), &mean, &max, &min);
+  } else if (dtype == FDDataType::INT64) {
+    CalculateStatisInfo<int64_t>(CpuData(), Numel(), &mean, &max, &min);
+  } else if (dtype == FDDataType::FP16) {
+    CalculateStatisInfo<float16>(CpuData(), Numel(), &mean, &max, &min);
+  } else {
+    FDASSERT(false,
+             "PrintInfo function doesn't support current situation, maybe you "
+             "need enhance this function now.");
+  }
+  std::cout << ", dtype=" << Str(dtype) << ", mean=" << mean << ", max=" << max
+            << ", min=" << min << std::endl;
+}
+
+bool FDTensor::ReallocFn(size_t nbytes) {
+  if (device == Device::GPU) {
+#ifdef WITH_GPU
+    size_t original_nbytes = nbytes_allocated;
+    if (nbytes > original_nbytes) {
+      if (buffer_ != nullptr) {
+        FDDeviceFree()(buffer_);
+      }
+      FDDeviceAllocator()(&buffer_, nbytes);
+      nbytes_allocated = nbytes;
+    }
+    return buffer_ != nullptr;
+#else
+    FDASSERT(false, "The UltraInfer FDTensor allocator didn't compile under "
+                    "-DWITH_GPU=ON,"
+                    "so this is an unexpected problem happend.");
+#endif
+  } else {
+    if (is_pinned_memory) {
+#ifdef WITH_GPU
+      size_t original_nbytes = nbytes_allocated;
+      if (nbytes > original_nbytes) {
+        if (buffer_ != nullptr) {
+          FDDeviceHostFree()(buffer_);
+        }
+        FDDeviceHostAllocator()(&buffer_, nbytes);
+        nbytes_allocated = nbytes;
+      }
+      return buffer_ != nullptr;
+#else
+      FDASSERT(false, "The UltraInfer FDTensor allocator didn't compile under "
+                      "-DWITH_GPU=ON,"
+                      "so this is an unexpected problem happend.");
+#endif
+    }
+    buffer_ = realloc(buffer_, nbytes);
+    nbytes_allocated = nbytes;
+    return buffer_ != nullptr;
+  }
+}
+
+void FDTensor::FreeFn() {
+  if (external_data_ptr != nullptr)
+    external_data_ptr = nullptr;
+  if (buffer_ != nullptr) {
+    if (device == Device::GPU) {
+#ifdef WITH_GPU
+      FDDeviceFree()(buffer_);
+#endif
+    } else {
+      if (is_pinned_memory) {
+#ifdef WITH_GPU
+        FDDeviceHostFree()(buffer_);
+#endif
+      } else {
+        FDHostFree()(buffer_);
+      }
+    }
+    buffer_ = nullptr;
+    nbytes_allocated = 0;
+  }
+}
+
+// TODO(liqi): no src_device and dst_device
+// should support copy from cpu or gpu  to cpu or gpu
+void FDTensor::CopyBuffer(void *dst, const void *src, size_t nbytes,
+                          const Device &device, bool is_pinned_memory) {
+  if (device == Device::GPU) {
+#ifdef WITH_GPU
+    FDASSERT(cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToDevice) == 0,
+             "[ERROR] Error occurs while copy memory from GPU to GPU");
+#else
+    FDASSERT(false,
+             "The UltraInfer didn't compile under -DWITH_GPU=ON, so copying "
+             "gpu buffer is "
+             "an unexpected problem happend.");
+#endif
+  } else {
+    if (is_pinned_memory) {
+#ifdef WITH_GPU
+      FDASSERT(cudaMemcpy(dst, src, nbytes, cudaMemcpyHostToHost) == 0,
+               "[ERROR] Error occurs while copy memory from host to host");
+#else
+      FDASSERT(false,
+               "The UltraInfer didn't compile under -DWITH_GPU=ON, so copying "
+               "gpu buffer is "
+               "an unexpected problem happend.");
+#endif
+    } else {
+      std::memcpy(dst, src, nbytes);
+    }
+  }
+}
+
+FDTensor::FDTensor(const std::string &tensor_name) { name = tensor_name; }
+FDTensor::FDTensor(const char *tensor_name) { name = tensor_name; }
+
+FDTensor::FDTensor(const Scalar &scalar) {
+  Allocate({1}, scalar.dtype());
+  switch (scalar.dtype()) {
+  case FDDataType::BOOL:
+    (reinterpret_cast<bool *>(Data()))[0] = scalar.to<bool>();
+    break;
+  case FDDataType::UINT8:
+    (reinterpret_cast<uint8_t *>(Data()))[0] = scalar.to<uint8_t>();
+    break;
+  case FDDataType::INT8:
+    (reinterpret_cast<int8_t *>(Data()))[0] = scalar.to<int8_t>();
+    break;
+  case FDDataType::INT16:
+    (reinterpret_cast<int16_t *>(Data()))[0] = scalar.to<int16_t>();
+    break;
+  case FDDataType::INT32:
+    (reinterpret_cast<int *>(Data()))[0] = scalar.to<int>();
+    break;
+  case FDDataType::INT64:
+    (reinterpret_cast<int64_t *>(Data()))[0] = scalar.to<int64_t>();
+    break;
+  case FDDataType::FP16:
+    (reinterpret_cast<float16 *>(Data()))[0] = scalar.to<float16>();
+    break;
+  case FDDataType::FP32:
+    (reinterpret_cast<float *>(Data()))[0] = scalar.to<float>();
+    break;
+  case FDDataType::FP64:
+    (reinterpret_cast<double *>(Data()))[0] = scalar.to<double>();
+    break;
+  default:
+    break;
+  }
+}
+
+FDTensor::FDTensor(const FDTensor &other)
+    : shape(other.shape), name(other.name), dtype(other.dtype),
+      device(other.device), device_id(other.device_id) {
+  // Copy buffer
+  if (other.buffer_ == nullptr) {
+    FreeFn();
+  } else {
+    size_t nbytes = Nbytes();
+    FDASSERT(ReallocFn(nbytes),
+             "The UltraInfer FDTensor allocate memory error");
+    CopyBuffer(buffer_, other.buffer_, nbytes, device, is_pinned_memory);
+  }
+  external_data_ptr = other.external_data_ptr;
+}
+
+FDTensor::FDTensor(FDTensor &&other)
+    : buffer_(other.buffer_), shape(std::move(other.shape)),
+      name(std::move(other.name)), dtype(other.dtype),
+      external_data_ptr(other.external_data_ptr), device(other.device),
+      device_id(other.device_id), nbytes_allocated(other.nbytes_allocated) {
+  other.name = "";
+  // Note(zhoushunjie): Avoid double free.
+  other.buffer_ = nullptr;
+  other.external_data_ptr = nullptr;
+}
+
+FDTensor &FDTensor::operator=(const FDTensor &other) {
+  if (&other != this) {
+    // Copy buffer
+    device_id = other.device_id;
+    if (other.buffer_ == nullptr) {
+      FreeFn();
+      buffer_ = nullptr;
+      shape = other.shape;
+      name = other.name;
+      dtype = other.dtype;
+      device = other.device;
+    } else {
+      Resize(other.shape, other.dtype, other.name, other.device);
+      size_t nbytes = Nbytes();
+      CopyBuffer(buffer_, other.buffer_, nbytes, device, is_pinned_memory);
+    }
+    external_data_ptr = other.external_data_ptr;
+  }
+  return *this;
+}
+
+FDTensor &FDTensor::operator=(FDTensor &&other) {
+  if (&other != this) {
+    FreeFn();
+    buffer_ = other.buffer_;
+    external_data_ptr = other.external_data_ptr;
+
+    shape = std::move(other.shape);
+    name = std::move(other.name);
+    dtype = other.dtype;
+    device = other.device;
+    device_id = other.device_id;
+    nbytes_allocated = other.nbytes_allocated;
+
+    other.name = "";
+    // Note(zhoushunjie): Avoid double free.
+    other.buffer_ = nullptr;
+    other.external_data_ptr = nullptr;
+  }
+  return *this;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/fd_tensor.h b/libs/ultrainfer/ultrainfer/core/fd_tensor.h
new file mode 100755
index 0000000000..f1f2460ef7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/fd_tensor.h
@@ -0,0 +1,216 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "ultrainfer/core/allocate.h"
+#include "ultrainfer/core/fd_scalar.h"
+#include "ultrainfer/core/fd_type.h"
+#include "ultrainfer/runtime/enum_variables.h"
+
+namespace ultrainfer {
+
+/*! @brief FDTensor object used to represend data matrix
+ *
+ */
+struct ULTRAINFER_DECL FDTensor {
+  /** \brief Set data buffer for a FDTensor, e.g
+   *  ```
+   *  std::vector<float> buffer(1 * 3 * 224 * 224, 0);
+   *  FDTensor tensor;
+   *  tensor.SetData({1, 3, 224, 224}, FDDataType::FLOAT, buffer.data());
+   *  ```
+   * \param[in] tensor_shape The shape of tensor
+   * \param[in] data_type The data type of tensor
+   * \param[in] data_buffer The pointer of data buffer memory
+   * \param[in] copy Whether to copy memory from data_buffer to tensor, if
+   * false, this tensor will share memory with data_buffer, and the data is
+   * managed by userself \param[in] data_device The device of data_buffer, e.g
+   * if data_buffer is a pointer to GPU data, the device should be Device::GPU
+   * \param[in] data_device_id The device id of data_buffer
+   */
+  void SetData(const std::vector<int64_t> &tensor_shape,
+               const FDDataType &data_type, void *data_buffer,
+               bool copy = false, const Device &data_device = Device::CPU,
+               int data_device_id = -1) {
+    SetExternalData(tensor_shape, data_type, data_buffer, data_device,
+                    data_device_id);
+    if (copy) {
+      StopSharing();
+    }
+  }
+
+  /// Get data pointer of tensor
+  void *GetData() { return MutableData(); }
+  /// Get data pointer of tensor
+  const void *GetData() const { return Data(); }
+
+  /// Expand the shape of tensor, it will not change the data memory, just
+  /// modify its attribute `shape`
+  void ExpandDim(int64_t axis = 0);
+
+  /// Squeeze the shape of tensor, it will not change the data memory, just
+  /// modify its attribute `shape`
+  void Squeeze(int64_t axis = 0);
+
+  /// Reshape the tensor, it will not change the data memory, just modify its
+  /// attribute `shape`
+  bool Reshape(const std::vector<int64_t> &new_shape);
+
+  /// Total size of tensor memory buffer in bytes
+  int Nbytes() const;
+
+  /// Total number of elements in tensor
+  int Numel() const;
+
+  /// Get shape of tensor
+  std::vector<int64_t> Shape() const { return shape; }
+
+  /// Get dtype of tensor
+  FDDataType Dtype() const { return dtype; }
+
+  /** \brief Allocate cpu data buffer for a FDTensor, e.g
+   *  ```
+   *  FDTensor tensor;
+   *  tensor.Allocate(FDDataType::FLOAT, {1, 3, 224, 224};
+   *  ```
+   * \param[in] data_type The data type of tensor
+   * \param[in] tensor_shape The shape of tensor
+   */
+  void Allocate(const FDDataType &data_type,
+                const std::vector<int64_t> &data_shape) {
+    Allocate(data_shape, data_type, name);
+  }
+
+  /// Debug function, print shape, dtype, mean, max, min of tensor
+  void PrintInfo(const std::string &prefix = "Debug TensorInfo: ") const;
+
+  /// Name of tensor, while feed to runtime, this need be defined
+  std::string name = "";
+
+  /// Whether the tensor is owned the data buffer or share the data buffer from
+  /// outside
+  bool IsShared() { return external_data_ptr != nullptr; }
+  /// If the tensor is share the data buffer from outside, `StopSharing` will
+  /// copy to its own structure; Otherwise, do nothing
+  void StopSharing();
+
+  // ******************************************************
+  // The following member and function only used by inside UltraInfer, maybe
+  // removed in next version
+
+  void *buffer_ = nullptr;
+  std::vector<int64_t> shape = {0};
+  FDDataType dtype = FDDataType::INT8;
+
+  // This use to skip memory copy step
+  // the external_data_ptr will point to the user allocated memory
+  // user has to maintain the memory, allocate and release
+  void *external_data_ptr = nullptr;
+  // The internal data will be on CPU
+  // Some times, the external data is on the GPU, and we are going to use
+  // GPU to inference the model
+  // so we can skip data transfer, which may improve the efficience
+  Device device = Device::CPU;
+  // By default the device id of FDTensor is -1, which means this value is
+  // invalid, and FDTensor is using the same device id as Runtime.
+  int device_id = -1;
+
+  // Whether the data buffer is in pinned memory, which is allocated
+  // with cudaMallocHost()
+  bool is_pinned_memory = false;
+
+  // if the external data is not on CPU, we use this temporary buffer
+  // to transfer data to CPU at some cases we need to visit the
+  // other devices' data
+  std::vector<int8_t> temporary_cpu_buffer;
+
+  // The number of bytes allocated so far.
+  // When resizing GPU memory, we will free and realloc the memory only if the
+  // required size is larger than this value.
+  size_t nbytes_allocated = 0;
+
+  // Get data buffer pointer
+  void *MutableData();
+
+  void *Data();
+
+  const void *Data() const;
+
+  // Use this data to get the tensor data to process
+  // Since the most senario is process data in CPU
+  // this function will return a pointer to cpu memory
+  // buffer.
+  // If the original data is on other device, the data
+  // will copy to cpu store in `temporary_cpu_buffer`
+  const void *CpuData() const;
+
+  // void SetDataBuffer(const std::vector<int64_t>& new_shape, const FDDataType&
+  // data_type, void* data_buffer, bool copy = false, const Device& new_device =
+  // Device::CPU, int new_device_id = -1); Set user memory buffer for Tensor,
+  // the memory is managed by the user it self, but the Tensor will share the
+  // memory with user So take care with the user buffer
+  void SetExternalData(const std::vector<int64_t> &new_shape,
+                       const FDDataType &data_type, void *data_buffer,
+                       const Device &new_device = Device::CPU,
+                       int new_device_id = -1);
+  // Initialize Tensor
+  // Include setting attribute for tensor
+  // and allocate cpu memory buffer
+  void Allocate(const std::vector<int64_t> &new_shape,
+                const FDDataType &data_type,
+                const std::string &tensor_name = "",
+                const Device &new_device = Device::CPU);
+
+  void Resize(size_t nbytes);
+
+  void Resize(const std::vector<int64_t> &new_shape);
+
+  void Resize(const std::vector<int64_t> &new_shape,
+              const FDDataType &data_type, const std::string &tensor_name = "",
+              const Device &new_device = Device::CPU);
+
+  bool ReallocFn(size_t nbytes);
+
+  void FreeFn();
+
+  FDTensor() {}
+  explicit FDTensor(const std::string &tensor_name);
+  explicit FDTensor(const char *tensor_name);
+
+  // Deep copy
+  FDTensor(const FDTensor &other);
+  // Move constructor
+  FDTensor(FDTensor &&other);
+
+  // Deep copy assignment
+  FDTensor &operator=(const FDTensor &other);
+  // Move assignment
+  FDTensor &operator=(FDTensor &&other);
+
+  // Scalar to FDTensor
+  explicit FDTensor(const Scalar &scalar);
+
+  ~FDTensor() { FreeFn(); }
+
+  static void CopyBuffer(void *dst, const void *src, size_t nbytes,
+                         const Device &device = Device::CPU,
+                         bool is_pinned_memory = false);
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/fd_type.cc b/libs/ultrainfer/ultrainfer/core/fd_type.cc
new file mode 100755
index 0000000000..3168d46715
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/fd_type.cc
@@ -0,0 +1,137 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/core/fd_type.h"
+
+#include "ultrainfer/core/float16.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+int FDDataTypeSize(const FDDataType &data_type) {
+  if (data_type == FDDataType::BOOL) {
+    return sizeof(bool);
+  } else if (data_type == FDDataType::INT16) {
+    return sizeof(int16_t);
+  } else if (data_type == FDDataType::INT32) {
+    return sizeof(int32_t);
+  } else if (data_type == FDDataType::INT64) {
+    return sizeof(int64_t);
+  } else if (data_type == FDDataType::FP32) {
+    return sizeof(float);
+  } else if (data_type == FDDataType::FP64) {
+    return sizeof(double);
+  } else if (data_type == FDDataType::UINT8) {
+    return sizeof(uint8_t);
+  } else if (data_type == FDDataType::INT8) {
+    return sizeof(int8_t);
+  } else if (data_type == FDDataType::FP16) {
+    return sizeof(float16);
+  } else {
+    FDASSERT(false, "Unexpected data type: %s", Str(data_type).c_str());
+  }
+  return -1;
+}
+
+std::string Str(const FDDataType &fdt) {
+  std::string out;
+  switch (fdt) {
+  case FDDataType::BOOL:
+    out = "FDDataType::BOOL";
+    break;
+  case FDDataType::INT16:
+    out = "FDDataType::INT16";
+    break;
+  case FDDataType::INT32:
+    out = "FDDataType::INT32";
+    break;
+  case FDDataType::INT64:
+    out = "FDDataType::INT64";
+    break;
+  case FDDataType::FP32:
+    out = "FDDataType::FP32";
+    break;
+  case FDDataType::FP64:
+    out = "FDDataType::FP64";
+    break;
+  case FDDataType::FP16:
+    out = "FDDataType::FP16";
+    break;
+  case FDDataType::UINT8:
+    out = "FDDataType::UINT8";
+    break;
+  case FDDataType::INT8:
+    out = "FDDataType::INT8";
+    break;
+  default:
+    out = "FDDataType::UNKNOWN";
+  }
+  return out;
+}
+
+std::ostream &operator<<(std::ostream &out, const FDDataType &fdt) {
+  switch (fdt) {
+  case FDDataType::BOOL:
+    out << "FDDataType::BOOL";
+    break;
+  case FDDataType::INT16:
+    out << "FDDataType::INT16";
+    break;
+  case FDDataType::INT32:
+    out << "FDDataType::INT32";
+    break;
+  case FDDataType::INT64:
+    out << "FDDataType::INT64";
+    break;
+  case FDDataType::FP32:
+    out << "FDDataType::FP32";
+    break;
+  case FDDataType::FP64:
+    out << "FDDataType::FP64";
+    break;
+  case FDDataType::FP16:
+    out << "FDDataType::FP16";
+    break;
+  case FDDataType::UINT8:
+    out << "FDDataType::UINT8";
+    break;
+  case FDDataType::INT8:
+    out << "FDDataType::INT8";
+    break;
+  default:
+    out << "FDDataType::UNKNOWN";
+  }
+  return out;
+}
+
+template <typename PlainType>
+const FDDataType TypeToDataType<PlainType>::dtype = UNKNOWN1;
+
+template <> const FDDataType TypeToDataType<bool>::dtype = BOOL;
+
+template <> const FDDataType TypeToDataType<int16_t>::dtype = INT16;
+
+template <> const FDDataType TypeToDataType<int32_t>::dtype = INT32;
+
+template <> const FDDataType TypeToDataType<int64_t>::dtype = INT64;
+
+template <> const FDDataType TypeToDataType<float>::dtype = FP32;
+
+template <> const FDDataType TypeToDataType<double>::dtype = FP64;
+
+template <> const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
+
+template <> const FDDataType TypeToDataType<int8_t>::dtype = INT8;
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/fd_type.h b/libs/ultrainfer/ultrainfer/core/fd_type.h
new file mode 100755
index 0000000000..7bedd7e329
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/fd_type.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <ostream>
+#include <sstream>
+#include <string>
+
+#include "ultrainfer/core/config.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+enum ULTRAINFER_DECL FDDataType {
+  BOOL,
+  INT16,
+  INT32,
+  INT64,
+  FP16,
+  FP32,
+  FP64,
+  UNKNOWN1,
+  UNKNOWN2,
+  UNKNOWN3,
+  UNKNOWN4,
+  UNKNOWN5,
+  UNKNOWN6,
+  UNKNOWN7,
+  UNKNOWN8,
+  UNKNOWN9,
+  UNKNOWN10,
+  UNKNOWN11,
+  UNKNOWN12,
+  UNKNOWN13,
+  UINT8,
+  INT8
+};
+
+ULTRAINFER_DECL std::ostream &operator<<(std::ostream &out,
+                                         const FDDataType &fdt);
+
+ULTRAINFER_DECL std::string Str(const FDDataType &fdt);
+
+ULTRAINFER_DECL int32_t FDDataTypeSize(const FDDataType &data_dtype);
+
+template <typename PlainType> struct ULTRAINFER_DECL TypeToDataType {
+  static const FDDataType dtype;
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/core/float16.h b/libs/ultrainfer/ultrainfer/core/float16.h
new file mode 100755
index 0000000000..bcacf21dba
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/core/float16.h
@@ -0,0 +1,651 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <cmath>
+#include <iostream>
+#include <limits>
+
+#if !defined(_WIN32)
+#define FD_ALIGN(x) __attribute__((aligned(x)))
+#else
+#define FD_ALIGN(x) __declspec(align(x))
+#endif
+
+namespace ultrainfer {
+
+struct FD_ALIGN(2) float16 {
+public:
+  uint16_t x;
+
+  // The following defaulted special class member functions
+  // are added to make float16 pass the std::is_trivial test
+  float16() = default;
+  float16(const float16 &o) = default;
+  float16 &operator=(const float16 &o) = default;
+  float16(float16 &&o) = default;
+  float16 &operator=(float16 &&o) = default;
+  ~float16() = default;
+
+  // Constructors
+
+#ifdef FD_WITH_NATIVE_FP16
+  // __fp16 is a native half precision data type for arm cpu,
+  // float16_t is an alias for __fp16
+  inline explicit float16(const float16_t &h) {
+    x = *reinterpret_cast<const uint16_t *>(&h);
+  }
+#endif
+
+  inline explicit float16(float val) {
+#if defined(FD_WITH_NATIVE_FP16)
+    float32x4_t tmp = vld1q_dup_f32(&val);
+    float16_t res = vget_lane_f16(vcvt_f16_f32(tmp), 0);
+    x = *reinterpret_cast<uint16_t *>(&res);
+
+#elif defined(__F16C__)
+    x = _cvtss_sh(val, 0);
+
+#else
+    // Conversion routine adapted from
+    // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
+    Bits v, s;
+    v.f = val;
+    uint32_t sign = v.si & sigN;
+    v.si ^= sign;
+    sign >>= shiftSign; // logical shift
+    s.si = mulN;
+    s.si = s.f * v.f; // correct subnormals
+    v.si ^= (s.si ^ v.si) & -(minN > v.si);
+    v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
+    v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
+    v.ui >>= shift; // logical shift
+    v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
+    v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
+    x = v.ui | sign;
+
+#endif
+  }
+
+  inline explicit float16(bool b) : x(b ? 0x3c00 : 0) {}
+
+  template <class T>
+  inline explicit float16(const T &val)
+      : x(float16(static_cast<float>(val)).x) {}
+
+  // Assignment operators
+
+#ifdef FD_WITH_NATIVE_FP16
+  inline float16 &operator=(const float16_t &rhs) {
+    x = *reinterpret_cast<const uint16_t *>(&rhs);
+    return *this;
+  }
+#endif
+
+  inline float16 &operator=(bool b) {
+    x = b ? 0x3c00 : 0;
+    return *this;
+  }
+
+  inline float16 &operator=(int8_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(uint8_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(int16_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(uint16_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(int32_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(uint32_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(int64_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(uint64_t val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(float val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+  inline float16 &operator=(double val) {
+    x = float16(val).x;
+    return *this;
+  }
+
+// Conversion opertors
+#ifdef FD_WITH_NATIVE_FP16
+  HOSTDEVICE inline explicit operator float16_t() const {
+    return *reinterpret_cast<const float16_t *>(this);
+  }
+#endif
+
+  inline operator float() const {
+#if defined(FD_WITH_NATIVE_FP16)
+    float16x4_t res = vld1_dup_f16(reinterpret_cast<const float16_t *>(this));
+    return vgetq_lane_f32(vcvt_f32_f16(res), 0);
+
+#elif defined(__F16C__)
+    return _cvtsh_ss(this->x);
+
+#else
+    // Conversion routine adapted from
+    // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
+    Bits v;
+    v.ui = this->x;
+    int32_t sign = v.si & sigC;
+    v.si ^= sign;
+    sign <<= shiftSign;
+    v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
+    v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
+    Bits s;
+    s.si = mulC;
+    s.f *= v.si;
+    int32_t mask = -(norC > v.si);
+    v.si <<= shift;
+    v.si ^= (s.si ^ v.si) & mask;
+    v.si |= sign;
+    return v.f;
+
+#endif
+  }
+
+  inline explicit operator bool() const { return (x & 0x7fff) != 0; }
+
+  inline explicit operator int8_t() const {
+    return static_cast<int8_t>(static_cast<float>(*this));
+  }
+
+  inline explicit operator uint8_t() const {
+    return static_cast<uint8_t>(static_cast<float>(*this));
+  }
+
+  inline explicit operator int16_t() const {
+    return static_cast<int16_t>(static_cast<float>(*this));
+  }
+
+  inline explicit operator uint16_t() const {
+    return static_cast<uint16_t>(static_cast<float>(*this));
+  }
+
+  inline explicit operator int32_t() const {
+    return static_cast<int32_t>(static_cast<float>(*this));
+  }
+
+  inline explicit operator uint32_t() const {
+    return static_cast<uint32_t>(static_cast<float>(*this));
+  }
+
+  inline explicit operator int64_t() const {
+    return static_cast<int64_t>(static_cast<float>(*this));
+  }
+
+  inline explicit operator uint64_t() const {
+    return static_cast<uint64_t>(static_cast<float>(*this));
+  }
+
+  inline operator double() const {
+    return static_cast<double>(static_cast<float>(*this));
+  }
+
+  inline bool operator>(const float &other) const {
+    return this->operator float() > other;
+  }
+
+  inline bool operator>(const double &other) const {
+    return this->operator double() > other;
+  }
+
+  inline bool operator<(const float &other) const {
+    return this->operator float() > other;
+  }
+
+  inline bool operator<(const double &other) const {
+    return this->operator double() > other;
+  }
+
+  template <typename T,
+            typename std::enable_if<!std::is_same<T, float16>::value,
+                                    bool>::type = true>
+  inline float16 &operator+=(const T &other) {
+    *this = float16(static_cast<T>(*this) + other);
+    return *this;
+  }
+
+private:
+  union Bits {
+    float f;
+    int32_t si;
+    uint32_t ui;
+  };
+
+  static const int shift = 13;
+  static const int shiftSign = 16;
+
+  static const int32_t infN = 0x7F800000;
+  static const int32_t maxN = 0x477FE000; // max flt16 as flt32
+  static const int32_t minN = 0x38800000; // min flt16 normal as flt32
+  static const int32_t sigN = 0x80000000; // sign bit
+
+  static constexpr int32_t infC = infN >> shift;
+  static constexpr int32_t nanN = (infC + 1)
+                                  << shift; // minimum flt16 nan as float32
+  static constexpr int32_t maxC = maxN >> shift;
+  static constexpr int32_t minC = minN >> shift;
+  static constexpr int32_t sigC = sigN >> shiftSign;
+
+  static const int32_t mulN = 0x52000000; // (1 << 23) / minN
+  static const int32_t mulC = 0x33800000; // minN / (1 << (23 - shift))
+  static const int32_t subC = 0x003FF;    // max flt32 subnormal downshifted
+  static const int32_t norC = 0x00400;    // min flt32 normal downshifted
+
+  static constexpr int32_t maxD = infC - maxC - 1;
+  static constexpr int32_t minD = minC - subC - 1;
+};
+
+// Arithmetic operators for float16 on ARMv8.2-A CPU
+#if defined(FD_WITH_NATIVE_FP16)
+inline float16 operator+(const float16 &a, const float16 &b) {
+  float16 res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "ld1 {v1.h}[0], [%[b_ptr]]\n"
+               "fadd h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&(res.x))
+               : // clobbers
+               "memory", "v0", "v1");
+  return res;
+}
+
+inline float16 operator-(const float16 &a, const float16 &b) {
+  float16 res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "ld1 {v1.h}[0], [%[b_ptr]]\n"
+               "fsub h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&(res.x))
+               : // clobbers
+               "memory", "v0", "v1");
+  return res;
+}
+
+inline float16 operator*(const float16 &a, const float16 &b) {
+  float16 res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "ld1 {v1.h}[0], [%[b_ptr]]\n"
+               "fmul h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&(res.x))
+               : // clobbers
+               "memory", "v0", "v1");
+  return res;
+}
+
+inline float16 operator/(const float16 &a, const float16 &b) {
+  float16 res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "ld1 {v1.h}[0], [%[b_ptr]]\n"
+               "fdiv h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&(res.x))
+               : // clobbers
+               "memory", "v0", "v1");
+  return res;
+}
+
+inline float16 operator-(const float16 &a) {
+  float16 res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "fneg h0, h0\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)),
+               [res_ptr] "r"(&(res.x))
+               : // clobbers
+               "memory", "v0");
+  return res;
+}
+
+inline float16 &operator+=(float16 &a, const float16 &b) { // NOLINT
+  a = a + b;
+  return a;
+}
+
+inline float16 &operator-=(float16 &a, const float16 &b) { // NOLINT
+  a = a - b;
+  return a;
+}
+
+inline float16 &operator*=(float16 &a, const float16 &b) { // NOLINT
+  a = a * b;
+  return a;
+}
+
+inline float16 &operator/=(float16 &a, const float16 &b) { // NOLINT
+  a = a / b;
+  return a;
+}
+
+inline bool operator==(const float16 &a, const float16 &b) {
+  uint16_t res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "ld1 {v1.h}[0], [%[b_ptr]]\n"
+               "fcmeq h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&res)
+               : // clobbers
+               "memory", "v0", "v1");
+  return (res & 0xffff) != 0;
+}
+
+inline bool operator!=(const float16 &a, const float16 &b) { return !(a == b); }
+
+inline bool operator<(const float16 &a, const float16 &b) {
+  uint16_t res;
+  asm volatile("ld1 {v1.h}[0], [%[a_ptr]]\n"
+               "ld1 {v0.h}[0], [%[b_ptr]]\n"
+               "fcmgt h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&res)
+               : // clobbers
+               "memory", "v0", "v1");
+  return (res & 0xffff) != 0;
+}
+
+inline bool operator<=(const float16 &a, const float16 &b) {
+  uint16_t res;
+  asm volatile("ld1 {v1.h}[0], [%[a_ptr]]\n"
+               "ld1 {v0.h}[0], [%[b_ptr]]\n"
+               "fcmge h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&res)
+               : // clobbers
+               "memory", "v0", "v1");
+  return (res & 0xffff) != 0;
+}
+
+inline bool operator>(const float16 &a, const float16 &b) {
+  uint16_t res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "ld1 {v1.h}[0], [%[b_ptr]]\n"
+               "fcmgt h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&res)
+               : // clobbers
+               "memory", "v0", "v1");
+  return (res & 0xffff) != 0;
+}
+
+inline bool operator>=(const float16 &a, const float16 &b) {
+  uint16_t res;
+  asm volatile("ld1 {v0.h}[0], [%[a_ptr]]\n"
+               "ld1 {v1.h}[0], [%[b_ptr]]\n"
+               "fcmge h0, h0, h1\n"
+               "st1 {v0.h}[0], [%[res_ptr]]\n"
+               : // outputs
+               : // inputs
+               [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
+               [res_ptr] "r"(&res)
+               : // clobbers
+               "memory", "v0", "v1");
+  return (res & 0xffff) != 0;
+#else
+inline float16 operator+(const float16 &a, const float16 &b) {
+  return float16(static_cast<float>(a) + static_cast<float>(b));
+}
+
+inline float16 operator-(const float16 &a, const float16 &b) {
+  return float16(static_cast<float>(a) - static_cast<float>(b));
+}
+
+inline float16 operator*(const float16 &a, const float16 &b) {
+  return float16(static_cast<float>(a) * static_cast<float>(b));
+}
+
+inline float16 operator/(const float16 &a, const float16 &b) {
+  return float16(static_cast<float>(a) / static_cast<float>(b));
+}
+
+inline float16 operator-(const float16 &a) {
+  float16 res;
+  res.x = a.x ^ 0x8000;
+  return res;
+}
+
+inline float16 &operator+=(float16 &a, const float16 &b) { // NOLINT
+  a = float16(static_cast<float>(a) + static_cast<float>(b));
+  return a;
+}
+
+inline float16 &operator-=(float16 &a, const float16 &b) { // NOLINT
+  a = float16(static_cast<float>(a) - static_cast<float>(b));
+  return a;
+}
+
+inline float16 &operator*=(float16 &a, const float16 &b) { // NOLINT
+  a = float16(static_cast<float>(a) * static_cast<float>(b));
+  return a;
+}
+
+inline float16 &operator/=(float16 &a, const float16 &b) { // NOLINT
+  a = float16(static_cast<float>(a) / static_cast<float>(b));
+  return a;
+}
+
+inline bool operator==(const float16 &a, const float16 &b) {
+  return static_cast<float>(a) == static_cast<float>(b);
+}
+
+inline bool operator!=(const float16 &a, const float16 &b) {
+  return static_cast<float>(a) != static_cast<float>(b);
+}
+
+inline bool operator<(const float16 &a, const float16 &b) {
+  return static_cast<float>(a) < static_cast<float>(b);
+}
+
+inline bool operator<=(const float16 &a, const float16 &b) {
+  return static_cast<float>(a) <= static_cast<float>(b);
+}
+
+inline bool operator>(const float16 &a, const float16 &b) {
+  return static_cast<float>(a) > static_cast<float>(b);
+}
+
+inline bool operator>=(const float16 &a, const float16 &b) {
+  return static_cast<float>(a) >= static_cast<float>(b);
+}
+#endif
+
+  template <typename T,
+            typename std::enable_if<std::is_integral<T>::value ||
+                                        std::is_same<T, float>::value,
+                                    bool>::type = true>
+  inline T &operator+=(T &a, const float16 &b) { // NOLINT
+    auto c = static_cast<float>(a) + static_cast<float>(b);
+    a = static_cast<T>(c);
+    return a;
+  }
+
+  inline double &operator+=(double &a, const float16 &b) { // NOLINT
+    a = a + static_cast<double>(b);
+    return a;
+  }
+
+  inline float16 raw_uint16_to_float16(uint16_t a) {
+    float16 res;
+    res.x = a;
+    return res;
+  }
+
+  inline bool(isnan)(const float16 &a) { return (a.x & 0x7fff) > 0x7c00; }
+
+  inline bool(isinf)(const float16 &a) { return (a.x & 0x7fff) == 0x7c00; }
+
+  inline bool(isfinite)(const float16 &a) {
+    return !((isnan)(a)) && !((isinf)(a));
+  }
+
+  inline float16(abs)(const float16 &a) {
+    return float16(std::abs(static_cast<float>(a)));
+  }
+
+  inline std::ostream &operator<<(std::ostream &os, const float16 &a) {
+    os << static_cast<float>(a);
+    return os;
+  }
+} // namespace ultrainfer
+
+namespace std {
+
+// Override the std::is_pod::value for float16
+// The reason is that different compilers implemented std::is_pod based on
+// different C++ standards. float16 class is a plain old data in C++11 given
+// that it is both trivial and standard_layout.
+// However, std::is_pod in nvcc 8.0 host c++ compiler follows C++0x and is
+// more restricted in that you cannot provide any customized
+// constructor in float16. Hence, we override is_pod here following C++11
+// so that .cu files can be successfully compiled by nvcc.
+template <> struct is_pod<ultrainfer::float16> {
+  static const bool value = is_trivial<ultrainfer::float16>::value &&
+                            is_standard_layout<ultrainfer::float16>::value;
+};
+
+template <>
+struct is_floating_point<ultrainfer::float16>
+    : std::integral_constant<
+          bool, std::is_same<ultrainfer::float16,
+                             typename std::remove_cv<
+                                 ultrainfer::float16>::type>::value> {};
+template <> struct is_signed<ultrainfer::float16> {
+  static const bool value = true;
+};
+
+template <> struct is_unsigned<ultrainfer::float16> {
+  static const bool value = false;
+};
+
+inline bool isnan(const ultrainfer::float16 &a) { return ultrainfer::isnan(a); }
+
+inline bool isinf(const ultrainfer::float16 &a) { return ultrainfer::isinf(a); }
+
+template <> struct numeric_limits<ultrainfer::float16> {
+  static const bool is_specialized = true;
+  static const bool is_signed = true;
+  static const bool is_integer = false;
+  static const bool is_exact = false;
+  static const bool has_infinity = true;
+  static const bool has_quiet_NaN = true;
+  static const bool has_signaling_NaN = true;
+  static const float_denorm_style has_denorm = denorm_present;
+  static const bool has_denorm_loss = false;
+  static const std::float_round_style round_style = std::round_to_nearest;
+  static const bool is_iec559 = false;
+  static const bool is_bounded = false;
+  static const bool is_modulo = false;
+  static const int digits = 11;
+  static const int digits10 = 3;
+  static const int max_digits10 = 5;
+  static const int radix = 2;
+  static const int min_exponent = -13;
+  static const int min_exponent10 = -4;
+  static const int max_exponent = 16;
+  static const int max_exponent10 = 4;
+  static const bool traps = true;
+  static const bool tinyness_before = false;
+
+  static ultrainfer::float16(min)() {
+    return ultrainfer::raw_uint16_to_float16(0x400);
+  }
+  static ultrainfer::float16 lowest() {
+    return ultrainfer::raw_uint16_to_float16(0xfbff);
+  }
+  static ultrainfer::float16(max)() {
+    return ultrainfer::raw_uint16_to_float16(0x7bff);
+  }
+  static ultrainfer::float16 epsilon() {
+    return ultrainfer::raw_uint16_to_float16(0x0800);
+  }
+  static ultrainfer::float16 round_error() { return ultrainfer::float16(0.5); }
+  static ultrainfer::float16 infinity() {
+    return ultrainfer::raw_uint16_to_float16(0x7c00);
+  }
+  static ultrainfer::float16 quiet_NaN() {
+    return ultrainfer::raw_uint16_to_float16(0x7e00);
+  }
+  static ultrainfer::float16 signaling_NaN() {
+    return ultrainfer::raw_uint16_to_float16(0x7e00);
+  }
+  static ultrainfer::float16 denorm_min() {
+    return ultrainfer::raw_uint16_to_float16(0x1);
+  }
+};
+
+inline ultrainfer::float16 abs(const ultrainfer::float16 &a) {
+  return ultrainfer::abs(a);
+}
+
+} // namespace std
diff --git a/libs/ultrainfer/ultrainfer/function/cast.cc b/libs/ultrainfer/ultrainfer/function/cast.cc
new file mode 100755
index 0000000000..db424b5d81
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/cast.cc
@@ -0,0 +1,47 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/cast.h"
+#include <algorithm>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename InT, typename OutT> struct CastOpTransformFunctor {
+  OutT operator()(InT in) const { return static_cast<OutT>(in); }
+};
+
+template <typename InT>
+void CastKernel(const FDTensor &x, FDTensor *out, FDDataType output_dtype) {
+
+  FD_VISIT_ALL_TYPES(output_dtype, "CastOpTransformFunctor", ([&] {
+                       auto *in_begin = reinterpret_cast<const InT *>(x.Data());
+                       auto *in_end = in_begin + x.Numel();
+                       FDTensor out_tmp;
+                       out_tmp.Allocate(x.Shape(), output_dtype);
+                       auto *out_begin =
+                           reinterpret_cast<data_t *>(out_tmp.Data());
+                       std::transform(in_begin, in_end, out_begin,
+                                      CastOpTransformFunctor<InT, data_t>());
+                       *out = std::move(out_tmp);
+                     }));
+}
+
+void Cast(const FDTensor &x, FDTensor *out, FDDataType output_dtype) {
+  FD_VISIT_ALL_TYPES(x.dtype, "CastKernel",
+                     ([&] { CastKernel<data_t>(x, out, output_dtype); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/cast.h b/libs/ultrainfer/ultrainfer/function/cast.h
new file mode 100755
index 0000000000..2a90c67088
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/cast.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Cast x to output data type element-wise. Only for float type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param output_dtype The type of output tensor.
+*/
+ULTRAINFER_DECL void Cast(const FDTensor &x, FDTensor *out,
+                          FDDataType output_dtype);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/clip.cc b/libs/ultrainfer/ultrainfer/function/clip.cc
new file mode 100755
index 0000000000..95308fb7d2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/clip.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/clip.h"
+#include <algorithm>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T> class ClipFunctor {
+public:
+  explicit ClipFunctor(const T min, const T max) : min_(min), max_(max) {}
+  T operator()(const T x) const {
+    return x < min_ ? min_ : x > max_ ? max_ : x;
+  }
+
+private:
+  T min_;
+  T max_;
+};
+
+template <typename T>
+void ClipKernel(const FDTensor &x, double min, double max, FDTensor *out) {
+  T max_ = static_cast<T>(max);
+  T min_ = static_cast<T>(min);
+
+  FDASSERT(min_ < max_,
+           "max should be greater than or equal to min. But received min = %f, "
+           "max = %f",
+           static_cast<float>(min_), static_cast<float>(max_));
+  FDTensor tmp;
+  tmp.Allocate(x.Shape(), x.Dtype());
+  const T *x_data = reinterpret_cast<const T *>(x.Data());
+
+  int64_t numel = x.Numel();
+  T *out_data = reinterpret_cast<T *>(tmp.Data());
+
+  std::transform(x_data, x_data + numel, out_data, ClipFunctor<T>(min_, max_));
+  *out = std::move(tmp);
+}
+
+void Clip(const FDTensor &x, double min, double max, FDTensor *out) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ClipKernel",
+                           ([&] { ClipKernel<data_t>(x, min, max, out); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/clip.h b/libs/ultrainfer/ultrainfer/function/clip.h
new file mode 100755
index 0000000000..34d6f873f7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/clip.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** This operator clip all elements in input into the range [ min, max ].
+   Support float32, float64, int32, int64
+    @param x The input tensor.
+    @param min The lower bound
+    @param max The uppper bound
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Clip(const FDTensor &x, double min, double max,
+                          FDTensor *out);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/concat.cc b/libs/ultrainfer/ultrainfer/function/concat.cc
new file mode 100755
index 0000000000..1d70ea2f42
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/concat.cc
@@ -0,0 +1,118 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/concat.h"
+
+#include "ultrainfer/utils/utils.h"
+#include <cstring>
+#include <limits>
+#include <set>
+#include <sstream>
+
+namespace ultrainfer {
+namespace function {
+
+std::vector<int64_t>
+ComputeAndCheckConcatOutputShape(const std::vector<FDTensor> &input, int axis) {
+  const size_t n = input.size();
+  auto out_dims = input[0].shape;
+  size_t in_zero_dims_size = out_dims.size();
+  for (size_t i = 1; i < n; ++i) {
+    FDASSERT(input[i].shape.size() == out_dims.size(),
+             "The shape of input[0] and input[%d] is expected to be equal. But "
+             "received input[0]'s shape = %s, input[%d]'s shape = %s.",
+             i, Str(out_dims).c_str(), i, Str(input[i].shape).c_str());
+    for (size_t j = 0; j < in_zero_dims_size; j++) {
+      if (j == axis) {
+        out_dims[axis] += input[i].shape[axis];
+      } else {
+        FDASSERT(
+            input[0].shape[j] == input[i].shape[j],
+            "The %d-th dimension of input[0] and input[%d] is expected to be "
+            "equal."
+            "But received input[0]'s shape = %s, input[%d]'s shape = %s.",
+            j, i, Str(input[0].shape).c_str(), i, Str(input[i].shape).c_str());
+      }
+    }
+  }
+  return out_dims;
+}
+
+template <typename T> struct ConcatFunctor {
+  void operator()(const std::vector<FDTensor> &input, int axis,
+                  FDTensor *output) {
+    size_t num = input.size();
+
+    int64_t rows = 1;
+    auto dim_0 = input[0].shape;
+    for (int i = 0; i < axis; ++i) {
+      rows *= dim_0[i];
+    }
+    int64_t out_rows = rows, out_cols = 0;
+
+    std::vector<int64_t> input_cols(num);
+    for (size_t i = 0; i < num; ++i) {
+      int64_t t_cols = input[i].Numel() / rows;
+      out_cols += t_cols;
+      input_cols[i] = t_cols;
+    }
+
+    // computation
+    T *output_data = reinterpret_cast<T *>(output->Data());
+    int64_t col_idx = 0;
+    for (size_t j = 0; j < num; ++j) {
+      int64_t col_len = input_cols[j];
+      const T *input_data = reinterpret_cast<const T *>(input[j].Data());
+      for (int64_t k = 0; k < out_rows; ++k) {
+        FDTensor::CopyBuffer(output_data + k * out_cols + col_idx,
+                             input_data + k * col_len, sizeof(T) * col_len,
+                             input[j].device, input[j].is_pinned_memory);
+      }
+      col_idx += col_len;
+    }
+  }
+};
+
+template <typename T>
+void ConcatKernel(const std::vector<FDTensor> &input, FDTensor *output,
+                  int axis) {
+  auto output_shape = ComputeAndCheckConcatOutputShape(input, axis);
+  FDTensor output_tmp;
+  output_tmp.Resize(output_shape, TypeToDataType<T>::dtype, output->name,
+                    input[0].device);
+
+  ConcatFunctor<T> functor;
+  functor(input, axis, &output_tmp);
+  *output = std::move(output_tmp);
+}
+
+void Concat(const std::vector<FDTensor> &x, FDTensor *out, int axis) {
+  FDASSERT(x.size() > 0,
+           "The number of FDTensor array should be larger than 0, but the size "
+           "of input is %d",
+           x.size());
+  int64_t rank = x[0].shape.size();
+  FDASSERT(axis >= -rank && axis < rank,
+           "The axis is expected to be in range of [%d, %d), but got %d", -rank,
+           rank, axis);
+  if (axis < 0) {
+    axis += rank;
+  }
+
+  FD_VISIT_ALL_TYPES(x[0].dtype, "Concat",
+                     ([&] { ConcatKernel<data_t>(x, out, axis); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/concat.h b/libs/ultrainfer/ultrainfer/function/concat.h
new file mode 100755
index 0000000000..862b9784e9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/concat.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Excute the concatenate operation for input FDTensor along given axis.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis Axis which will be concatenated.
+*/
+
+ULTRAINFER_DECL void Concat(const std::vector<FDTensor> &x, FDTensor *out,
+                            int axis = 0);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/cuda_cast.cu b/libs/ultrainfer/ultrainfer/function/cuda_cast.cu
new file mode 100755
index 0000000000..c97e7ea3a3
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/cuda_cast.cu
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef WITH_GPU
+#include "ultrainfer/function/cuda_cast.h"
+namespace ultrainfer {
+namespace function {
+template <typename T_IN, typename T_OUT>
+__global__ void CudaCastKernel(const T_IN *in, T_OUT *out, int edge) {
+  int position = blockDim.x * blockIdx.x + threadIdx.x;
+  if (position >= edge)
+    return;
+  out[position] = (T_OUT)in[position];
+}
+
+void CudaCast(const FDTensor &in, FDTensor *out, cudaStream_t stream) {
+  int jobs = in.Numel();
+  int threads = 256;
+  int blocks = ceil(jobs / (float)threads);
+  if (in.dtype == FDDataType::INT64 && out->dtype == FDDataType::INT32) {
+    CudaCastKernel<int64_t, int32_t><<<blocks, threads, 0, stream>>>(
+        reinterpret_cast<int64_t *>(const_cast<void *>(in.Data())),
+        reinterpret_cast<int32_t *>(out->MutableData()), jobs);
+  } else if (in.dtype == FDDataType::INT32 && out->dtype == FDDataType::INT64) {
+    CudaCastKernel<int32_t, int64_t><<<blocks, threads, 0, stream>>>(
+        reinterpret_cast<int32_t *>(const_cast<void *>(in.Data())),
+        reinterpret_cast<int64_t *>(out->MutableData()), jobs);
+  } else {
+    FDASSERT(false, "CudaCast only support input INT64, output INT32.");
+  }
+}
+
+} // namespace function
+} // namespace ultrainfer
+#endif
diff --git a/libs/ultrainfer/ultrainfer/function/cuda_cast.h b/libs/ultrainfer/ultrainfer/function/cuda_cast.h
new file mode 100755
index 0000000000..594be05a0f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/cuda_cast.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+/** Cast the type of the data in GPU buffer.
+    @param in The input tensor.
+    @param out The output tensor
+    @param stream CUDA stream
+*/
+ULTRAINFER_DECL void CudaCast(const FDTensor &in, FDTensor *out,
+                              cudaStream_t stream);
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/cumprod.cc b/libs/ultrainfer/ultrainfer/function/cumprod.cc
new file mode 100755
index 0000000000..47eb89cc9a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/cumprod.cc
@@ -0,0 +1,78 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/cumprod.h"
+
+namespace ultrainfer {
+namespace function {
+
+void GetCumprodDimInfo(const std::vector<int64_t> &dim, int cumprod_dim,
+                       size_t *outer_dim, size_t *mid_dim, size_t *inner_dim) {
+  int dim_size = dim.size();
+  FDASSERT(cumprod_dim >= -dim_size,
+           "The input dim of CumprodOp should be larger than the opposite "
+           "rank of input x which is %d. But received dim = %d",
+           -dim_size, cumprod_dim);
+  FDASSERT(cumprod_dim < dim_size,
+           "The input dim of CumprodOp should be smaller than the "
+           "rank of input x which is %d. But received dim = %d",
+           dim_size, cumprod_dim);
+  if (cumprod_dim < 0)
+    cumprod_dim += dim_size;
+
+  *outer_dim = 1;
+  for (int i = 0; i < cumprod_dim; ++i) {
+    *outer_dim *= dim[i];
+  }
+  *mid_dim = dim[cumprod_dim];
+  *inner_dim = 1;
+  for (int i = cumprod_dim + 1; i < dim_size; ++i) {
+    *inner_dim *= dim[i];
+  }
+}
+
+template <typename T>
+void CumprodKernel(const FDTensor &x, FDTensor *out, int axis) {
+  auto *x_data = reinterpret_cast<const T *>(x.Data());
+  auto shape = x.Shape();
+
+  size_t outer_dim = 1;
+  size_t mid_dim = 1;
+  size_t inner_dim = 1;
+  GetCumprodDimInfo(shape, axis, &outer_dim, &mid_dim, &inner_dim);
+
+  out->Allocate(x.Shape(), x.Dtype());
+  auto *out_data = reinterpret_cast<T *>(out->Data());
+
+  for (size_t i = 0; i < outer_dim; i++) {
+    for (size_t j = 0; j < mid_dim; j++) {
+      for (size_t k = 0; k < inner_dim; k++) {
+        size_t pos = i * mid_dim * inner_dim + j * inner_dim + k;
+        if (j == 0) {
+          out_data[pos] = x_data[pos];
+        } else {
+          out_data[pos] = out_data[pos - inner_dim] * x_data[pos];
+        }
+      }
+    }
+  }
+}
+
+void Cumprod(const FDTensor &x, FDTensor *out, int axis) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "CumprodKernel",
+                           ([&] { CumprodKernel<data_t>(x, out, axis); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/cumprod.h b/libs/ultrainfer/ultrainfer/function/cumprod.h
new file mode 100755
index 0000000000..ec5ec92845
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/cumprod.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Excute the concatenate operation for input FDTensor along given axis.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axisi Axis which will be concatenated.
+*/
+
+ULTRAINFER_DECL void Cumprod(const FDTensor &x, FDTensor *out, int axis = 0);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/eigen.cc b/libs/ultrainfer/ultrainfer/function/eigen.cc
new file mode 100755
index 0000000000..b60fb125eb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/eigen.cc
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/eigen.h"
+
+namespace ultrainfer {
+namespace function {
+std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::instance_ = nullptr;
+
+std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::GetInstance() {
+  if (instance_ == nullptr) {
+    instance_ = std::make_shared<EigenDeviceWrapper>();
+  }
+  return instance_;
+}
+
+const Eigen::DefaultDevice *EigenDeviceWrapper::GetDevice() const {
+  return &device_;
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/eigen.h b/libs/ultrainfer/ultrainfer/function/eigen.h
new file mode 100755
index 0000000000..0562a26d02
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/eigen.h
@@ -0,0 +1,139 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/axis_utils.h"
+#include "unsupported/Eigen/CXX11/Tensor"
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+namespace ultrainfer {
+namespace function {
+// EigenDim converts shape into Eigen::DSizes.
+template <int D> struct EigenDim {
+  using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
+
+  static Type From(const std::vector<int64_t> &dims) {
+    Type ret;
+    for (int64_t d = 0; d < dims.size(); d++) {
+      ret[d] = dims[d];
+    }
+    return ret;
+  }
+};
+
+// Interpret FDTensor as EigenTensor and EigenConstTensor.
+template <typename T, size_t D, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenTensor {
+  using Type = Eigen::TensorMap<Eigen::Tensor<T, D, MajorType, IndexType>>;
+
+  using ConstType =
+      Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
+
+  static Type From(FDTensor &tensor,
+                   const std::vector<int64_t> &dims) { // NOLINT
+    return Type(reinterpret_cast<T *>(tensor.Data()), EigenDim<D>::From(dims));
+  }
+
+  static Type From(FDTensor &tensor) { // NOLINT
+    return From(tensor, tensor.shape);
+  } // NOLINT
+
+  static ConstType From(const FDTensor &tensor,
+                        const std::vector<int64_t> &dims) {
+    return ConstType(reinterpret_cast<const T *>(tensor.Data()),
+                     EigenDim<D>::From(dims));
+  }
+
+  static ConstType From(const FDTensor &tensor) {
+    return From(tensor, tensor.shape);
+  }
+};
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenScalar {
+  // Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
+  using Type = Eigen::TensorMap<
+      Eigen::TensorFixedSize<T, Eigen::Sizes<>, MajorType, IndexType>>;
+  using ConstType = Eigen::TensorMap<
+      Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
+
+  static Type From(FDTensor &tensor) {
+    return Type(reinterpret_cast<T *>(tensor.Data()));
+  } // NOLINT
+
+  static ConstType From(const FDTensor &tensor) {
+    return ConstType(reinterpret_cast<const T *>(tensor.Data()));
+  }
+};
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
+  // Flatten reshapes a Tensor into an EigenVector.
+  static typename EigenVector::Type Flatten(FDTensor &tensor) { // NOLINT
+    return EigenVector::From(tensor, {tensor.Numel()});
+  }
+
+  static typename EigenVector::ConstType
+  Flatten(const FDTensor &tensor) { // NOLINT
+    return EigenVector::From(tensor, {tensor.Numel()});
+  }
+};
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
+  static typename EigenMatrix::Type Reshape(FDTensor &tensor, // NOLINT
+                                            int num_col_dims) {
+    int rank = tensor.shape.size();
+    FDASSERT((num_col_dims > 0 && num_col_dims < rank),
+             "Input dimension number(num_col_dims) must be between 0 and %d, "
+             "but received number is %d.",
+             rank, num_col_dims);
+    const int n = SizeToAxis(num_col_dims, tensor.shape);
+    const int d = SizeFromAxis(num_col_dims, tensor.shape);
+    return EigenMatrix::From(tensor, {n, d});
+  }
+
+  static typename EigenMatrix::ConstType Reshape(const FDTensor &tensor,
+                                                 int num_col_dims) {
+    int rank = tensor.shape.size();
+    FDASSERT((num_col_dims > 0 && num_col_dims < rank),
+             "Input dimension number(num_col_dims) must be between 0 and %d, "
+             "but received number is %d.",
+             rank, num_col_dims);
+    const int n = SizeToAxis(num_col_dims, tensor.shape);
+    const int d = SizeFromAxis(num_col_dims, tensor.shape);
+    return EigenMatrix::From(tensor, {n, d});
+  }
+};
+
+class EigenDeviceWrapper {
+public:
+  static std::shared_ptr<EigenDeviceWrapper> GetInstance();
+  const Eigen::DefaultDevice *GetDevice() const;
+
+private:
+  Eigen::DefaultDevice device_;
+  static std::shared_ptr<EigenDeviceWrapper> instance_;
+};
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/elementwise.cc b/libs/ultrainfer/ultrainfer/function/elementwise.cc
new file mode 100755
index 0000000000..7e8b4d65cc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/elementwise.cc
@@ -0,0 +1,110 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/elementwise.h"
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/function/elementwise_base.h"
+#include "ultrainfer/function/elementwise_functor.h"
+#include "ultrainfer/utils/utils.h"
+#include <algorithm>
+
+namespace ultrainfer {
+namespace function {
+
+DEFINE_ELEMENTWISE_OP(Add);
+DEFINE_ELEMENTWISE_OP(Multiply);
+DEFINE_ELEMENTWISE_OP(Subtract);
+DEFINE_ELEMENTWISE_OP(Divide);
+
+void Add(const FDTensor &x, const FDTensor &y, FDTensor *out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "AddRawKernel",
+                     ([&] { AddRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+void Subtract(const FDTensor &x, const FDTensor &y, FDTensor *out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "SubtractRawKernel",
+                     ([&] { SubtractRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+void Multiply(const FDTensor &x, const FDTensor &y, FDTensor *out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "MultiplyRawKernel",
+                     ([&] { MultiplyRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+void Divide(const FDTensor &x, const FDTensor &y, FDTensor *out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "DivideRawKernel",
+                     ([&] { DivideRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+template <typename T> struct MaximumRawKernel {
+  void operator()(const FDTensor &x, const FDTensor &y, int axis,
+                  FDTensor *out) {
+    ElementwiseCompute<MaximumFunctor<T>, T>(x, y, axis, MaximumFunctor<T>(),
+                                             out);
+  }
+};
+
+void Maximum(const FDTensor &x, const FDTensor &y, FDTensor *out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "MaximumRawKernel",
+                     ([&] { MaximumRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+} // namespace function
+
+FDTensor operator+(const FDTensor &x, const FDTensor &y) {
+  FDTensor out;
+  function::Add(x, y, &out);
+  return out;
+}
+
+FDTensor operator-(const FDTensor &x, const FDTensor &y) {
+  FDTensor out;
+  function::Subtract(x, y, &out);
+  return out;
+}
+
+FDTensor operator*(const FDTensor &x, const FDTensor &y) {
+  FDTensor out;
+  function::Multiply(x, y, &out);
+  return out;
+}
+
+FDTensor operator/(const FDTensor &x, const FDTensor &y) {
+  FDTensor out;
+  function::Divide(x, y, &out);
+  return out;
+}
+
+#define INSTANTIATE_OPERATOR(operation_type)                                   \
+  template FDTensor operator operation_type(const FDTensor &x, bool y);        \
+  template FDTensor operator operation_type(const FDTensor &x, uint8_t y);     \
+  template FDTensor operator operation_type(const FDTensor &x, int16_t y);     \
+  template FDTensor operator operation_type(const FDTensor &x, int y);         \
+  template FDTensor operator operation_type(const FDTensor &x, int64_t y);     \
+  template FDTensor operator operation_type(const FDTensor &x, float y);       \
+  template FDTensor operator operation_type(const FDTensor &x, double y);      \
+  template FDTensor operator operation_type(bool x, const FDTensor &y);        \
+  template FDTensor operator operation_type(uint8_t x, const FDTensor &y);     \
+  template FDTensor operator operation_type(int16_t x, const FDTensor &y);     \
+  template FDTensor operator operation_type(int x, const FDTensor &y);         \
+  template FDTensor operator operation_type(int64_t x, const FDTensor &y);     \
+  template FDTensor operator operation_type(float x, const FDTensor &y);       \
+  template FDTensor operator operation_type(double x, const FDTensor &y)
+
+INSTANTIATE_OPERATOR(+);
+INSTANTIATE_OPERATOR(-);
+INSTANTIATE_OPERATOR(*);
+INSTANTIATE_OPERATOR(/);
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/elementwise.h b/libs/ultrainfer/ultrainfer/function/elementwise.h
new file mode 100755
index 0000000000..bc6aeb93a2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/elementwise.h
@@ -0,0 +1,105 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_scalar.h"
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+
+namespace function {
+
+/** Excute the add operation for input FDTensors. *out = x + y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Add(const FDTensor &x, const FDTensor &y, FDTensor *out);
+
+/** Excute the subtract operation for input FDTensors.  *out = x - y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Subtract(const FDTensor &x, const FDTensor &y,
+                              FDTensor *out);
+
+/** Excute the multiply operation for input FDTensors.  *out = x * y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Multiply(const FDTensor &x, const FDTensor &y,
+                              FDTensor *out);
+
+/** Excute the divide operation for input FDTensors.  *out = x / y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Divide(const FDTensor &x, const FDTensor &y,
+                            FDTensor *out);
+
+/** Excute the maximum operation for input FDTensors.  *out = max(x, y).
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Maximum(const FDTensor &x, const FDTensor &y,
+                             FDTensor *out);
+
+} // namespace function
+
+ULTRAINFER_DECL FDTensor operator+(const FDTensor &x, const FDTensor &y);
+
+template <typename T> FDTensor operator+(const FDTensor &x, T y) {
+  return x + FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator+(T x, const FDTensor &y) {
+  return FDTensor(Scalar(x)) + y;
+}
+
+ULTRAINFER_DECL FDTensor operator-(const FDTensor &x, const FDTensor &y);
+
+template <typename T> FDTensor operator-(const FDTensor &x, T y) {
+  return x - FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator-(T x, const FDTensor &y) {
+  return FDTensor(Scalar(x)) - y;
+}
+
+ULTRAINFER_DECL FDTensor operator*(const FDTensor &x, const FDTensor &y);
+
+template <typename T> FDTensor operator*(const FDTensor &x, T y) {
+  return x * FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator*(T x, const FDTensor &y) {
+  return FDTensor(Scalar(x)) * y;
+}
+
+ULTRAINFER_DECL FDTensor operator/(const FDTensor &x, const FDTensor &y);
+
+template <typename T> FDTensor operator/(const FDTensor &x, T y) {
+  return x / FDTensor(Scalar(y));
+}
+
+template <typename T> FDTensor operator/(T x, const FDTensor &y) {
+  return FDTensor(Scalar(x)) / y;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/elementwise_base.h b/libs/ultrainfer/ultrainfer/function/elementwise_base.h
new file mode 100755
index 0000000000..b03172d3bf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/elementwise_base.h
@@ -0,0 +1,265 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/function/eigen.h"
+
+namespace ultrainfer {
+namespace function {
+
+#define DEFINE_ELEMENTWISE_OP(name)                                            \
+  template <typename T> struct name##RawKernel {                               \
+    void operator()(const FDTensor &x, const FDTensor &y, int axis,            \
+                    FDTensor *out) {                                           \
+      if (x.Shape() == y.Shape()) {                                            \
+        SameDimsElementwiseCompute<SameDims##name##Functor<T>>()(x, y, out);   \
+      } else {                                                                 \
+        auto x_dims = x.Shape();                                               \
+        auto y_dims = y.Shape();                                               \
+        if (x_dims.size() >= y_dims.size()) {                                  \
+          ElementwiseCompute<name##Functor<T>, T>(x, y, axis,                  \
+                                                  name##Functor<T>(), out);    \
+        } else {                                                               \
+          ElementwiseCompute<Inverse##name##Functor<T>, T>(                    \
+              x, y, axis, Inverse##name##Functor<T>(), out);                   \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+  }
+
+inline void GetMidDims(const std::vector<int64_t> &x_dims,
+                       const std::vector<int64_t> &y_dims, const int axis,
+                       int *pre, int *n, int *post,
+                       int *is_run_common_broadcast) {
+  *pre = 1;
+  *n = 1;
+  *post = 1;
+  *is_run_common_broadcast = 0;
+  for (int i = 0; i < axis; ++i) {
+    (*pre) *= x_dims[i];
+  }
+  for (int i = 0; i < y_dims.size(); ++i) {
+    if (x_dims[i + axis] != y_dims[i]) {
+      FDASSERT(y_dims[i] == 1 || x_dims[i + axis] == 1,
+               "Broadcast dimension mismatch. Operands "
+               "could not be broadcast together with the shape of "
+               "X = [%s] and the shape of Y = [%s]. Received [%d] "
+               "in X is not equal to [%d] in Y.",
+               Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
+               y_dims[i]);
+      *is_run_common_broadcast = 1;
+      return;
+    }
+    (*n) *= y_dims[i];
+  }
+  for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
+    (*post) *= x_dims[i];
+  }
+}
+
+inline std::vector<int64_t>
+TrimTrailingSingularDims(const std::vector<int64_t> &dims) {
+  // Remove trailing dimensions of size 1 for y
+  auto actual_dims_size = dims.size();
+  for (; actual_dims_size != 0; --actual_dims_size) {
+    if (dims[actual_dims_size - 1] != 1)
+      break;
+  }
+  if (actual_dims_size == dims.size())
+    return dims;
+  std::vector<int64_t> trim_dims;
+  trim_dims.resize(actual_dims_size);
+  for (int i = 0; i < actual_dims_size; ++i) {
+    trim_dims[i] = dims[i];
+  }
+  return trim_dims;
+}
+
+inline int GetElementwiseIndex(const int64_t *x_dims_array, const int max_dim,
+                               const int64_t *index_array) {
+  int index_ = 0;
+  for (int i = 0; i < max_dim; i++) {
+    if (x_dims_array[i] > 1) {
+      index_ = index_ * x_dims_array[i] + index_array[i];
+    }
+  }
+  return index_;
+}
+
+inline void UpdateElementwiseIndexArray(const int64_t *out_dims_array,
+                                        const int max_dim,
+                                        int64_t *index_array) {
+  for (int i = max_dim - 1; i >= 0; --i) {
+    ++index_array[i];
+    if (index_array[i] >= out_dims_array[i]) {
+      index_array[i] -= out_dims_array[i];
+    } else {
+      break;
+    }
+  }
+}
+
+inline void GetBroadcastDimsArrays(const std::vector<int64_t> &x_dims,
+                                   const std::vector<int64_t> &y_dims,
+                                   int64_t *x_dims_array, int64_t *y_dims_array,
+                                   int64_t *out_dims_array, const int max_dim,
+                                   const int axis) {
+  FDASSERT(axis >= 0,
+           "Axis should be great than or equal to 0, but received axis is %d.",
+           axis);
+  FDASSERT(axis < max_dim,
+           "Axis should be less than %d, but received axis is %d.", max_dim,
+           axis);
+  if (x_dims.size() > y_dims.size()) {
+    std::fill(y_dims_array, y_dims_array + axis, 1);
+    if (axis + y_dims.size() < max_dim) {
+      std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1);
+    }
+    std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array);
+    std::copy(y_dims.data(), y_dims.data() + y_dims.size(),
+              y_dims_array + axis);
+  } else {
+    std::fill(x_dims_array, x_dims_array + axis, 1);
+    if (axis + x_dims.size() < max_dim) {
+      std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1);
+    }
+    std::copy(x_dims.data(), x_dims.data() + x_dims.size(),
+              x_dims_array + axis);
+    std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array);
+  }
+
+  for (int i = 0; i < max_dim; i++) {
+    FDASSERT(x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 ||
+                 y_dims_array[i] <= 1,
+             "Broadcast dimension mismatch. Operands "
+             "could not be broadcast together with the shape of "
+             "X = [%s] and the shape of Y = [%s]. Received [%d] "
+             "in X is not equal to [%d] in Y.",
+             Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
+             y_dims[i]);
+    if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) ||
+        (x_dims_array[i] == 1 && y_dims_array[i] == 1)) {
+      out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]);
+    } else {
+      out_dims_array[i] = -1;
+    }
+  }
+}
+
+template <typename Functor, typename T, typename OutType = T>
+void CommonForwardBroadcastCPU(const FDTensor &x, const FDTensor &y,
+                               FDTensor *z, int64_t *x_dims_array,
+                               int64_t *y_dims_array, int64_t *out_dims_array,
+                               int max_dim, Functor func,
+                               const bool is_xsize_larger = true) {
+  std::vector<int64_t> index_array(max_dim, 0);
+  const T *x_data = reinterpret_cast<const T *>(x.Data());
+  const T *y_data = reinterpret_cast<const T *>(y.Data());
+  FDASSERT(x_data != nullptr, "The input X should not be empty.");
+  FDASSERT(y_data != nullptr, "The input X should not be empty.");
+  OutType *out_data = reinterpret_cast<OutType *>(z->Data());
+
+  const int out_size = std::accumulate(out_dims_array, out_dims_array + max_dim,
+                                       1, std::multiplies<int64_t>());
+  int x_index, y_index;
+  for (int out_index = 0; out_index < out_size; ++out_index) {
+    x_index = GetElementwiseIndex(x_dims_array, max_dim, index_array.data());
+    y_index = GetElementwiseIndex(y_dims_array, max_dim, index_array.data());
+    if (is_xsize_larger) {
+      out_data[out_index] = func(x_data[x_index], y_data[y_index]);
+    } else {
+      out_data[out_index] = func(y_data[y_index], x_data[x_index]);
+    }
+
+    UpdateElementwiseIndexArray(out_dims_array, max_dim, index_array.data());
+  }
+}
+
+template <typename Functor, typename T, typename OutType = T>
+void CommonElementwiseBroadcastForward(const FDTensor &x, const FDTensor &y,
+                                       FDTensor *z,
+                                       const std::vector<int64_t> &x_dims,
+                                       const std::vector<int64_t> &y_dims,
+                                       Functor func, int axis,
+                                       const bool is_xsize_larger = true) {
+  int x_dims_size = x_dims.size();
+  int y_dims_size = y_dims.size();
+  int max_dim = (std::max)(x_dims_size, y_dims_size);
+  axis = (axis == -1 ? std::abs(x_dims_size - y_dims_size) : axis);
+  FDASSERT(axis >= 0,
+           "Axis should be great than or equal to 0, but received axis is %d.",
+           axis);
+  FDASSERT(axis < max_dim,
+           "Axis should be less than %d, but received axis is %d.", max_dim,
+           axis);
+  std::vector<int64_t> x_dims_array(max_dim);
+  std::vector<int64_t> y_dims_array(max_dim);
+  std::vector<int64_t> out_dims_array(max_dim);
+  GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
+                         y_dims_array.data(), out_dims_array.data(), max_dim,
+                         axis);
+  FDTensor tmp;
+  tmp.Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
+  CommonForwardBroadcastCPU<Functor, T, OutType>(
+      x, y, &tmp, x_dims_array.data(), y_dims_array.data(),
+      out_dims_array.data(), max_dim, func, is_xsize_larger);
+  *z = std::move(tmp);
+}
+
+template <typename Functor, typename T, typename OutType = T>
+void ElementwiseCompute(const FDTensor &x, const FDTensor &y, int axis,
+                        Functor func, FDTensor *z) {
+  auto x_dims = x.Shape();
+  auto y_dims = y.Shape();
+  bool is_xsize_larger = true;
+  int max_dim = x_dims.size();
+  if (x_dims.size() < y_dims.size()) {
+    is_xsize_larger = false;
+    max_dim = y_dims.size();
+  }
+
+  int diff_size = x_dims.size() - y_dims.size();
+  axis = (axis == -1 ? std::abs(diff_size) : axis);
+  FDASSERT(axis >= 0,
+           "Axis should be great than or equal to 0, but received axis is %d.",
+           axis);
+  FDASSERT(axis < max_dim,
+           "Axis should be less than %d, but received axis is %d.", max_dim,
+           axis);
+
+  int pre, n, post, is_run_common_broadcast, axis_trim = 0;
+  if (is_xsize_larger) {
+    auto y_dims_trimed = TrimTrailingSingularDims(y_dims);
+    axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis;
+    GetMidDims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post,
+               &is_run_common_broadcast);
+  } else {
+    auto x_dims_trimed = TrimTrailingSingularDims(x_dims);
+    axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis;
+    GetMidDims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post,
+               &is_run_common_broadcast);
+  }
+  // special case for common implementation.
+  // case 1: x=[2,3,1,5], y=[2,1,4,1]
+  // case 2: x=[2,3,4], y=[1,1,4]
+  CommonElementwiseBroadcastForward<Functor, T, OutType>(
+      x, y, z, x_dims, y_dims, func, axis, is_xsize_larger);
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/elementwise_functor.h b/libs/ultrainfer/ultrainfer/function/elementwise_functor.h
new file mode 100755
index 0000000000..9058b28d2b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/elementwise_functor.h
@@ -0,0 +1,131 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/function/elementwise.h"
+#include "ultrainfer/function/elementwise_base.h"
+#include <algorithm>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename Functor> struct SameDimsElementwiseCompute {
+  void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) {
+    z->Allocate(x.Shape(), x.Dtype());
+    Functor()(x, y, z);
+  }
+};
+
+template <typename T> struct SameDimsAddFunctor {
+  void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) {
+    const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+    auto eigen_x = EigenVector<T>::Flatten(x);
+    auto eigen_y = EigenVector<T>::Flatten(y);
+    auto eigen_z = EigenVector<T>::Flatten(*z);
+    eigen_z.device(dev) = eigen_x + eigen_y;
+  }
+};
+
+template <typename T> struct SameDimsSubtractFunctor {
+  void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) {
+    const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+    auto eigen_x = EigenVector<T>::Flatten(x);
+    auto eigen_y = EigenVector<T>::Flatten(y);
+    auto eigen_z = EigenVector<T>::Flatten(*z);
+    eigen_z.device(dev) = eigen_x - eigen_y;
+  }
+};
+
+template <typename T> struct SameDimsMultiplyFunctor {
+  void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) {
+    const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+    auto eigen_x = EigenVector<T>::Flatten(x);
+    auto eigen_y = EigenVector<T>::Flatten(y);
+    auto eigen_z = EigenVector<T>::Flatten(*z);
+    eigen_z.device(dev) = eigen_x * eigen_y;
+  }
+};
+
+template <typename T> struct SameDimsDivideFunctor {
+  void operator()(const FDTensor &x, const FDTensor &y, FDTensor *z) {
+    const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+    auto eigen_x = EigenVector<T>::Flatten(x);
+    auto eigen_y = EigenVector<T>::Flatten(y);
+    auto eigen_z = EigenVector<T>::Flatten(*z);
+    eigen_z.device(dev) = eigen_x / eigen_y;
+  }
+};
+
+// Add
+template <typename T> struct AddFunctor {
+  inline T operator()(const T a, const T b) const { return a + b; }
+};
+template <typename T> struct InverseAddFunctor {
+  inline T operator()(const T a, const T b) const { return b + a; }
+};
+
+// Subtract
+template <typename T> struct SubtractFunctor {
+  inline T operator()(const T a, const T b) const { return a - b; }
+};
+template <typename T> struct InverseSubtractFunctor {
+  inline T operator()(const T a, const T b) const { return b - a; }
+};
+
+// Multiply
+template <typename T> struct MultiplyFunctor {
+  inline T operator()(const T a, const T b) const { return a * b; }
+};
+template <> struct MultiplyFunctor<bool> {
+  inline bool operator()(const bool a, const bool b) const { return a && b; }
+};
+template <typename T> struct InverseMultiplyFunctor {
+  inline T operator()(const T a, const T b) const { return b * a; }
+};
+template <> struct InverseMultiplyFunctor<bool> {
+  inline bool operator()(const bool a, const bool b) const { return b && a; }
+};
+
+// Divide
+#define DIV_ERROR_INFO                                                         \
+  "InvalidArgumentError: Integer division by zero encountered in "             \
+  "(floor) divide. Please check the input value."
+
+template <typename T, typename Enable = void> struct DivideFunctor {
+  inline T operator()(const T a, const T b) const { return a / b; }
+};
+
+template <typename T>
+struct DivideFunctor<
+    T, typename std::enable_if<std::is_integral<T>::value>::type> {
+  inline T operator()(const T a, const T b) const {
+    // For int32/int64, need to check whether the divison is zero.
+    FDASSERT(b != 0, DIV_ERROR_INFO);
+    return a / b;
+  }
+};
+
+template <typename T, typename Enable = void> struct InverseDivideFunctor {
+  inline T operator()(const T a, const T b) const { return b / a; }
+};
+
+// Maximum
+template <typename T> struct MaximumFunctor {
+  inline T operator()(const T a, const T b) const { return a > b ? a : b; }
+};
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/full.cc b/libs/ultrainfer/ultrainfer/function/full.cc
new file mode 100755
index 0000000000..65d0860612
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/full.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/full.h"
+#include "ultrainfer/function/eigen.h"
+#include <algorithm>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T> void FullValue(FDTensor *tensor, const Scalar &val) {
+  auto t = EigenVector<T>::Flatten(*tensor);
+  auto &place = *EigenDeviceWrapper::GetInstance()->GetDevice();
+  t.device(place) = t.constant(val.to<T>());
+}
+
+void Full(const Scalar &value, const std::vector<int64_t> &shape, FDTensor *out,
+          FDDataType dtype) {
+  FD_VISIT_ALL_TYPES(dtype, "Full", ([&] {
+                       out->Allocate(shape, dtype);
+                       FullValue<data_t>(out, value);
+                     }));
+}
+
+void FullLike(const FDTensor &x, const Scalar &value, FDTensor *out,
+              FDDataType dtype) {
+  Full(value, x.Shape(), out, dtype);
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/full.h b/libs/ultrainfer/ultrainfer/function/full.h
new file mode 100755
index 0000000000..42deb1822f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/full.h
@@ -0,0 +1,44 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_scalar.h"
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Fill the value to tensor
+    @param value The value to be filled in tensor
+    @param shape The shape of output tensor.
+    @param out The output tensor which stores the result.
+    @param dtype The data type of output tensor. Default to float32
+*/
+ULTRAINFER_DECL void Full(const Scalar &value,
+                          const std::vector<int64_t> &shape, FDTensor *out,
+                          FDDataType dtype = FDDataType::FP32);
+
+/** Fill the value to tensor
+    @param x The input tensor.
+    @param value The value to be filled in tensor
+    @param out The output tensor which stores the result.
+    @param dtype The data type of output tensor. Default to float32
+*/
+ULTRAINFER_DECL void FullLike(const FDTensor &x, const Scalar &value,
+                              FDTensor *out,
+                              FDDataType dtype = FDDataType::FP32);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/functions.h b/libs/ultrainfer/ultrainfer/function/functions.h
new file mode 100755
index 0000000000..3efa03e0c0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/functions.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/function/cast.h"
+#include "ultrainfer/function/clip.h"
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/function/cumprod.h"
+#include "ultrainfer/function/elementwise.h"
+#include "ultrainfer/function/full.h"
+#include "ultrainfer/function/gather_scatter_along_axis.h"
+#include "ultrainfer/function/gaussian_random.h"
+#include "ultrainfer/function/isfinite.h"
+#include "ultrainfer/function/linspace.h"
+#include "ultrainfer/function/math.h"
+#include "ultrainfer/function/pad.h"
+#include "ultrainfer/function/quantile.h"
+#include "ultrainfer/function/reduce.h"
+#include "ultrainfer/function/slice.h"
+#include "ultrainfer/function/softmax.h"
+#include "ultrainfer/function/sort.h"
+#include "ultrainfer/function/split.h"
+#include "ultrainfer/function/tile.h"
+#include "ultrainfer/function/transpose.h"
diff --git a/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc
new file mode 100755
index 0000000000..4cbb64662e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.cc
@@ -0,0 +1,125 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/gather_scatter_along_axis.h"
+#include "ultrainfer/function/tile.h"
+
+namespace ultrainfer {
+namespace function {
+
+class TensorAssign {
+public:
+  template <typename tensor_t>
+  void operator()(tensor_t *self_data, tensor_t *src_data) const {
+    *self_data = *src_data;
+  }
+};
+static TensorAssign tensor_assign;
+
+template <typename T, typename index_t = int64_t, bool is_scatter_like = true>
+struct GatherScatterFunctor {
+  template <typename func_t>
+  void operator()(const FDTensor &x, int axis, const FDTensor &index,
+                  FDTensor *result, const func_t &reduce_op) {
+    if (index.Numel() == 0) {
+      return;
+    }
+    result->Allocate(index.Shape(), x.Dtype());
+    const T *x_data = reinterpret_cast<const T *>(x.Data());
+    const index_t *index_data = reinterpret_cast<const index_t *>(index.Data());
+    T *result_data = reinterpret_cast<T *>(result->Data());
+
+    int64_t x_size = x.Numel();
+    int64_t index_size = index.Numel();
+    int64_t result_size = result->Numel();
+    auto x_dims = x.Shape();
+    auto index_dims = index.Shape();
+    auto result_dims = result->Shape();
+    if (x_size == 0 || result_size == 0 || index_size == 0) {
+      FDASSERT(false, "zero size input found, self_size, result_size, "
+                      "index_size cannot be 0");
+      return;
+    }
+    int select_dim_size = index_dims[axis];
+    // index matrix has different shape with self matrix or src matrix.
+    int replaced_select_dim_size =
+        is_scatter_like ? result_dims[axis] : x_dims[axis];
+    int64_t inner_dim_size = 1;
+    int64_t outer_dim_size = 1;
+    for (int64_t i = 0; i < axis; ++i) {
+      inner_dim_size *= index_dims[i];
+    }
+
+    for (int i = axis + 1; i < index_dims.size(); i++) {
+      outer_dim_size *= index_dims[i];
+    }
+    int64_t index_idx = 0;
+    int64_t self_idx, src_idx;
+    // N layer loop squeezed into 3 layers loop
+    for (int64_t i = 0; i < inner_dim_size; i++) {
+      for (int64_t j = 0; j < select_dim_size; j++) {
+        for (int64_t k = 0; k < outer_dim_size; k++) {
+          int64_t index = index_data[index_idx];
+          // This index might out of bound of index matrix's index, so here
+          // multiply the replaced_select_dim_size.
+          int64_t replace_index = k + index * outer_dim_size +
+                                  i * outer_dim_size * replaced_select_dim_size;
+
+          self_idx = is_scatter_like ? replace_index : index_idx;
+          src_idx = is_scatter_like ? index_idx : replace_index;
+
+          reduce_op((T *)(result_data + self_idx), // NOLINT
+                    (T *)(x_data + src_idx));      // NOLINT
+
+          index_idx++;
+        }
+      }
+    }
+  }
+};
+
+template <typename T> struct GatherFunctor {
+  void operator()(const FDTensor &x, int axis, const FDTensor &index,
+                  FDTensor *result) {
+    FD_VISIT_INT_TYPES(index.Dtype(), "GatherFunctor", [&]() {
+      auto x_shape = x.Shape();
+      auto index_shape = index.Shape();
+      std::vector<int64_t> repeat_times(x_shape.size(), 1);
+      for (int i = 0; i < x_shape.size(); ++i) {
+        repeat_times[i] = x_shape[i] / index_shape[i];
+      }
+      repeat_times[axis] = 1;
+      FDTensor gs_index;
+      Tile(index, repeat_times, &gs_index);
+      GatherScatterFunctor<T, data_t, /*is_scatter_like=*/false>()(
+          x, axis, gs_index, result, tensor_assign);
+    });
+  }
+};
+
+void GatherAlongAxis(const FDTensor &x, const FDTensor &index, FDTensor *result,
+                     int axis) {
+  int rank = x.Shape().size();
+  FDASSERT(axis >= -rank && axis < rank,
+           "axis should be in range [-%d, %d - 1].", rank, rank - 1);
+  if (axis < 0) {
+    axis += rank;
+  }
+  FD_VISIT_ALL_TYPES(x.Dtype(), "GatherAlongAxis", [&]() {
+    GatherFunctor<data_t>()(x, axis, index, result);
+  });
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h
new file mode 100755
index 0000000000..4ff44cc501
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/gather_scatter_along_axis.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Output is obtained by gathering entries of axis of x indexed by index and
+ *  concatenate them together.
+    @param x The input tensor.
+    @param index The index of a tensor to gather.
+    @param out The output tensor which stores the result.
+    @param axis Axis which will be gathered.
+*/
+ULTRAINFER_DECL void GatherAlongAxis(const FDTensor &x, const FDTensor &index,
+                                     FDTensor *result, int axis);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/gaussian_random.cc b/libs/ultrainfer/ultrainfer/function/gaussian_random.cc
new file mode 100755
index 0000000000..c0a01de1d1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/gaussian_random.cc
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/gaussian_random.h"
+#include <memory>
+#include <random>
+#include <utility>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T>
+void GaussianRandomKernel(const std::vector<int64_t> &shape, float mean,
+                          float std, int seed, FDTensor *out) {
+  std::normal_distribution<T> dist(mean, std);
+
+  out->Allocate(shape, TypeToDataType<T>::dtype);
+  int64_t size = out->Numel();
+  T *data = reinterpret_cast<T *>(out->Data());
+  std::mt19937_64 engine;
+  engine.seed(seed);
+  for (int64_t i = 0; i < size; ++i) {
+    data[i] = dist(engine);
+  }
+}
+
+void GaussianRandom(const std::vector<int64_t> &shape, FDTensor *out,
+                    FDDataType dtype, float mean, float std, int seed) {
+  FD_VISIT_FLOAT_TYPES(dtype, "GaussianRandomKernel", [&]() {
+    GaussianRandomKernel<data_t>(shape, mean, std, seed, out);
+  });
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/gaussian_random.h b/libs/ultrainfer/ultrainfer/function/gaussian_random.h
new file mode 100755
index 0000000000..53f2711a7a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/gaussian_random.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Output is obtained by gathering entries of axis of x indexed by index and
+ *  concatenate them together.
+    @param shape The output tensor shape.
+    @param out the output tensor.
+    @param mean mean value of gaussian random
+    @param std standard value of gaussian random
+    @param seed The seed of random generator.
+    @param dtype The data type of the output Tensor.
+*/
+void GaussianRandom(const std::vector<int64_t> &shape, FDTensor *out,
+                    FDDataType dtype = FDDataType::FP32, float mean = 0.0f,
+                    float std = 1.0f, int seed = 0);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/isfinite.cc b/libs/ultrainfer/ultrainfer/function/isfinite.cc
new file mode 100755
index 0000000000..b46b5b0a97
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/isfinite.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/isfinite.h"
+#include "ultrainfer/core/float16.h"
+#include <algorithm>
+#include <type_traits>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T, typename OutT, class Enable = void> struct IsNanFunctor {
+  OutT operator()(const T &a) const { return static_cast<OutT>(std::isnan(a)); }
+};
+
+template <typename T, typename OutT>
+struct IsNanFunctor<T, OutT,
+                    typename std::enable_if<std::is_integral<T>::value>::type> {
+  OutT operator()(const T &a) const { return static_cast<OutT>(false); }
+};
+
+template <typename OutT> struct IsNanFunctor<ultrainfer::float16, OutT, void> {
+  OutT operator()(const ultrainfer::float16 &a) const {
+    return static_cast<OutT>(ultrainfer::isnan(a));
+  }
+};
+
+template <typename T, typename OutT, class Enable = void> struct IsInfFunctor {
+  OutT operator()(const T &a) const { return static_cast<OutT>(std::isinf(a)); }
+};
+
+template <typename T, typename OutT>
+struct IsInfFunctor<T, OutT,
+                    typename std::enable_if<std::is_integral<T>::value>::type> {
+  OutT operator()(const T &a) const { return static_cast<OutT>(false); }
+};
+
+template <typename OutT> struct IsInfFunctor<ultrainfer::float16, OutT, void> {
+  OutT operator()(const ultrainfer::float16 &a) const {
+    return static_cast<OutT>(ultrainfer::isinf(a));
+  }
+};
+
+template <typename T, typename OutT, class Enable = void>
+struct IsFiniteFunctor {
+  OutT operator()(const T &a) const {
+    return static_cast<OutT>(std::isfinite(a));
+  }
+};
+
+template <typename T, typename OutT>
+struct IsFiniteFunctor<
+    T, OutT, typename std::enable_if<std::is_integral<T>::value>::type> {
+  OutT operator()(const T &a) const { return static_cast<OutT>(true); }
+};
+
+template <typename OutT>
+struct IsFiniteFunctor<ultrainfer::float16, OutT, void> {
+  OutT operator()(const ultrainfer::float16 &a) const {
+    return static_cast<OutT>(ultrainfer::isfinite(a));
+  }
+};
+
+#define DEFINE_ISFINITE_KERNEL(isfinite_kernel, functor)                       \
+  template <typename T>                                                        \
+  void isfinite_kernel(const FDTensor &x, FDTensor *out, FDDataType dtype) {   \
+    FD_VISIT_ALL_TYPES(dtype, #isfinite_kernel, ([&] {                         \
+                         out->Allocate(x.Shape(), dtype);                      \
+                         functor<T, data_t> unary_func;                        \
+                         data_t *out_ptr =                                     \
+                             reinterpret_cast<data_t *>(out->Data());          \
+                         const T *input_ptr =                                  \
+                             reinterpret_cast<const T *>(x.Data());            \
+                         std::transform(input_ptr, input_ptr + x.Numel(),      \
+                                        out_ptr, unary_func);                  \
+                       }));                                                    \
+  }
+
+DEFINE_ISFINITE_KERNEL(IsNanKernel, IsNanFunctor)
+DEFINE_ISFINITE_KERNEL(IsInfKernel, IsInfFunctor)
+DEFINE_ISFINITE_KERNEL(IsFiniteKernel, IsFiniteFunctor)
+#undef DEFINE_ISFINITE_KERNEL
+
+void IsNan(const FDTensor &x, FDTensor *out, FDDataType dtype) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "IsNanKernel",
+                       ([&] { IsNanKernel<data_t>(x, out, dtype); }));
+}
+
+void IsInf(const FDTensor &x, FDTensor *out, FDDataType dtype) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "IsInfKernel",
+                       ([&] { IsInfKernel<data_t>(x, out, dtype); }));
+}
+
+void IsFinite(const FDTensor &x, FDTensor *out, FDDataType dtype) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "IsFiniteKernel",
+                       ([&] { IsFiniteKernel<data_t>(x, out, dtype); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/isfinite.h b/libs/ultrainfer/ultrainfer/function/isfinite.h
new file mode 100755
index 0000000000..466cd60f9c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/isfinite.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Return whether every element of input tensor is NaN or not.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dtype The output data type
+*/
+ULTRAINFER_DECL void IsNan(const FDTensor &x, FDTensor *out,
+                           FDDataType dtype = FDDataType::BOOL);
+
+/** Return whether every element of input tensor is Inf or not.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dtype The output data type
+*/
+ULTRAINFER_DECL void IsInf(const FDTensor &x, FDTensor *out,
+                           FDDataType dtype = FDDataType::BOOL);
+
+/** Return whether every element of input tensor is finite or not.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dtype The output data type
+*/
+ULTRAINFER_DECL void IsFinite(const FDTensor &x, FDTensor *out,
+                              FDDataType dtype = FDDataType::BOOL);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/linspace.cc b/libs/ultrainfer/ultrainfer/function/linspace.cc
new file mode 100755
index 0000000000..030c525e41
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/linspace.cc
@@ -0,0 +1,52 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/linspace.h"
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T>
+void LinspaceKernel(double start, double end, int num, FDTensor *out) {
+  FDASSERT(
+      num > 0,
+      "The num of linspace op should be larger than 0, but received num is %d",
+      num);
+  out->Allocate({num}, TypeToDataType<T>::dtype);
+  T *out_data = reinterpret_cast<T *>(out->Data());
+  if (num > 1) {
+    // step should be of double type for all types
+    double step = (static_cast<double>(end - start)) / (num - 1);
+    int half_num = num / 2;
+    for (int i = 0; i < num; ++i) {
+      if (i < half_num) {
+        out_data[i] = static_cast<T>(start + step * i);
+      } else {
+        out_data[i] = static_cast<T>(end - step * (num - i - 1));
+      }
+    }
+  } else {
+    out_data[0] = static_cast<T>(start);
+  }
+}
+
+void Linspace(double start, double end, int num, FDTensor *out,
+              FDDataType dtype) {
+  FD_VISIT_INT_FLOAT_TYPES(dtype, "LinspaceKernel", ([&] {
+                             LinspaceKernel<data_t>(start, end, num, out);
+                           }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/linspace.h b/libs/ultrainfer/ultrainfer/function/linspace.h
new file mode 100755
index 0000000000..94e7d330d2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/linspace.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Return fixed number of evenly spaced values within a given interval.
+    @param start The input start is start variable of range.
+    @param end The input stop is start variable of range.
+    @param num The input num is given num of the sequence.
+    @param out The output tensor which stores the result.
+    @param dtype The data type of output tensor, default to float32.
+*/
+ULTRAINFER_DECL void Linspace(double start, double end, int num, FDTensor *out,
+                              FDDataType dtype = FDDataType::FP32);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/math.cc b/libs/ultrainfer/ultrainfer/function/math.cc
new file mode 100755
index 0000000000..c9290ba5ce
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/math.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/math.h"
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/function/math_functor.h"
+
+namespace ultrainfer {
+namespace function {
+
+#define DEFINE_ACTIVATION_KERNEL(name, functor_class)                          \
+  template <typename T> void name##Kernel(const FDTensor &x, FDTensor *out) {  \
+    functor_class<T> functor;                                                  \
+    ActivationImpl<T, functor_class<T>>(x, out, functor);                      \
+  }
+
+template <typename T, typename Functor>
+void ActivationImpl(const FDTensor &X, FDTensor *Out, const Functor &functor) {
+  FDASSERT(Out != nullptr, "Output Out should not be nullptr");
+  FDTensor out_tmp;
+  auto x = EigenVector<T>::Flatten(X);
+  out_tmp.Allocate(X.Shape(), X.Dtype());
+  auto out = EigenVector<T>::Flatten(out_tmp);
+  const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+  functor(dev, x, out);
+  *Out = std::move(out_tmp);
+}
+
+DEFINE_ACTIVATION_KERNEL(Sqrt, SqrtFunctor)
+DEFINE_ACTIVATION_KERNEL(Log, LogFunctor)
+DEFINE_ACTIVATION_KERNEL(Round, RoundFunctor)
+DEFINE_ACTIVATION_KERNEL(Exp, ExpFunctor)
+DEFINE_ACTIVATION_KERNEL(Abs, AbsFunctor)
+DEFINE_ACTIVATION_KERNEL(Ceil, CeilFunctor)
+DEFINE_ACTIVATION_KERNEL(Floor, FloorFunctor)
+
+void Sqrt(const FDTensor &x, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "SqrtKernel",
+                       ([&] { SqrtKernel<data_t>(x, out); }));
+}
+
+void Log(const FDTensor &x, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "LogKernel",
+                       ([&] { LogKernel<data_t>(x, out); }));
+}
+
+void Round(const FDTensor &x, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "RoundKernel",
+                       ([&] { RoundKernel<data_t>(x, out); }));
+}
+
+void Exp(const FDTensor &x, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "ExpKernel",
+                       ([&] { ExpKernel<data_t>(x, out); }));
+}
+
+void Abs(const FDTensor &x, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "AbsKernel",
+                       ([&] { AbsKernel<data_t>(x, out); }));
+}
+
+void Ceil(const FDTensor &x, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "CeilKernel",
+                       ([&] { CeilKernel<data_t>(x, out); }));
+}
+
+void Floor(const FDTensor &x, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "FloorKernel",
+                       ([&] { FloorKernel<data_t>(x, out); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/math.h b/libs/ultrainfer/ultrainfer/function/math.h
new file mode 100755
index 0000000000..fe53d3487b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/math.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Calculates the sqrt of the given input Tensor, element-wise. Only for float
+   type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Sqrt(const FDTensor &x, FDTensor *out);
+
+/** Calculates the natural log of the given input Tensor, element-wise. Only for
+   float type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Log(const FDTensor &x, FDTensor *out);
+
+/** Rounds the values in the input to the nearest integer value, element-wise.
+   Only for float type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Round(const FDTensor &x, FDTensor *out);
+
+/** Computes exp of x element-wise with a natural number e as the base,
+   element-wise. Only for float type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Exp(const FDTensor &x, FDTensor *out);
+
+/** This operator is used to perform elementwise abs for input X. Only for float
+   type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Abs(const FDTensor &x, FDTensor *out);
+
+/** Computes ceil of x element-wise. Only for float type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Ceil(const FDTensor &x, FDTensor *out);
+
+/** Computes floor of x element-wise. Only for float type FDTensor
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Floor(const FDTensor &x, FDTensor *out);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/math_functor.h b/libs/ultrainfer/ultrainfer/function/math_functor.h
new file mode 100755
index 0000000000..083007d012
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/math_functor.h
@@ -0,0 +1,81 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/function/eigen.h"
+
+namespace ultrainfer {
+namespace function {
+
+// log(x) = natural logarithm of x
+template <typename T> struct LogFunctor {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x.log();
+  }
+};
+
+// exp functor
+// exp(x) = e^x
+template <typename T> struct ExpFunctor {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x.exp();
+  }
+};
+
+// round(x) = [x]
+template <typename T> struct RoundFunctor {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x.round();
+  }
+};
+
+// sqrt(x) = x^(1/2)
+template <typename T> struct SqrtFunctor {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x.sqrt();
+  }
+};
+
+// abs(x) = x if x > 0 else -x
+template <typename T> struct AbsFunctor {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) =
+        x.unaryExpr([](T v) { return v > static_cast<T>(0) ? v : -v; });
+  }
+};
+
+// ceil(x) = ceiling(x)
+template <typename T> struct CeilFunctor {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x.ceil();
+  }
+};
+
+// floor(x) = flooring(x)
+template <typename T> struct FloorFunctor {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x.floor();
+  }
+};
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/pad.cc b/libs/ultrainfer/ultrainfer/function/pad.cc
new file mode 100755
index 0000000000..86bf452c28
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/pad.cc
@@ -0,0 +1,119 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/pad.h"
+
+#include <cstdlib>
+
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace function {
+template <typename T, int Rank> struct PadEigen {
+  using Array = std::array<std::pair<int64_t, int64_t>, Rank>;
+  using Array32Bit = std::array<std::pair<int, int>, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using InType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+
+  static void Eval(const Eigen::DefaultDevice &dev, OutType out,
+                   const InType &in, const Array &padding, const T value) {
+    out.device(dev) = in.pad(padding, value);
+  }
+
+  static void Eval32(const Eigen::DefaultDevice &dev, OutType32BitIndex out,
+                     const InType32BitIndex &in, const Array32Bit &padding,
+                     const T value) {
+    out.device(dev) = in.pad(padding, value);
+  }
+};
+
+template <typename T, size_t D>
+void PadFunction(const std::vector<int> &pads, const FDTensor &src, T pad_value,
+                 FDTensor *out) {
+  std::array<std::pair<int64_t, int64_t>, D> paddings;
+
+  for (size_t i = 0; i < paddings.size(); ++i) {
+    paddings[i].first = pads[i * 2];
+    paddings[i].second = pads[i * 2 + 1];
+  }
+
+  auto src_tensor = EigenTensor<T, D>::From(src);
+  auto out_tensor = EigenTensor<T, D>::From(*out);
+
+  const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+  PadEigen<T, D>::Eval(dev, out_tensor, src_tensor, paddings, pad_value);
+}
+
+template <typename T>
+void PaddingFunctor(int rank, const std::vector<int> &pads, T pad_value,
+                    const FDTensor &src, FDTensor *out) {
+  switch (rank) {
+  case 1:
+    PadFunction<T, 1>(pads, src, pad_value, out);
+    break;
+  case 2:
+    PadFunction<T, 2>(pads, src, pad_value, out);
+    break;
+  case 3:
+    PadFunction<T, 3>(pads, src, pad_value, out);
+    break;
+  case 4:
+    PadFunction<T, 4>(pads, src, pad_value, out);
+    break;
+  case 5:
+    PadFunction<T, 5>(pads, src, pad_value, out);
+    break;
+  case 6:
+    PadFunction<T, 6>(pads, src, pad_value, out);
+    break;
+  default:
+    FDASSERT(
+        false,
+        "Pad only support tensors with no more than 6 dimensions currently.");
+  }
+}
+
+template <typename T>
+void PadKernel(const FDTensor &x, const std::vector<int> &paddings,
+               const T &pad_value, FDTensor *out) {
+  std::vector<int64_t> new_shape(x.shape.size());
+  for (size_t i = 0; i < x.shape.size(); ++i) {
+    new_shape[i] = x.shape[i] + paddings[2 * i] + paddings[2 * i + 1];
+  }
+  out->Allocate(new_shape, x.dtype);
+  PaddingFunctor<T>(x.shape.size(), paddings, pad_value, x, out);
+}
+
+void Pad(const FDTensor &x, FDTensor *out, const std::vector<int> &pads,
+         float value) {
+  FDASSERT(pads.size() == x.shape.size() * 2,
+           "Size of pads:%zu must be 2 times of rank:%zu.", pads.size(),
+           x.shape.size());
+  FDTensor out_tmp;
+  FD_VISIT_ALL_TYPES(x.dtype, "PadKernel",
+                     ([&] { PadKernel<data_t>(x, pads, value, &out_tmp); }));
+  *out = std::move(out_tmp);
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/pad.h b/libs/ultrainfer/ultrainfer/function/pad.h
new file mode 100755
index 0000000000..2b94c5587a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/pad.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+/** Excute the pad operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param pads The size of padding for each dimension, for 3-D tensor, the pads
+   should be [1d-left, 1d-right, 2d-left, 2d-right, 3d-left, 3d-right]
+    @param pad_value The value which will fill into out tensor
+*/
+ULTRAINFER_DECL void Pad(const FDTensor &x, FDTensor *out,
+                         const std::vector<int> &pads, float pad_value = 0);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/quantile.cc b/libs/ultrainfer/ultrainfer/function/quantile.cc
new file mode 100755
index 0000000000..54dfc15544
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/quantile.cc
@@ -0,0 +1,130 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/quantile.h"
+#include "ultrainfer/core/fd_scalar.h"
+#include "ultrainfer/function/cast.h"
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/function/elementwise.h"
+#include "ultrainfer/function/gather_scatter_along_axis.h"
+#include "ultrainfer/function/isfinite.h"
+#include "ultrainfer/function/math.h"
+#include "ultrainfer/function/reduce.h"
+#include "ultrainfer/function/sort.h"
+#include "ultrainfer/function/transpose.h"
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T>
+void QuantileKernel(const FDTensor &x, const std::vector<double> &q,
+                    const std::vector<int> &axis, FDTensor *out) {
+  FDASSERT(q.size() > 0, "q should not be empty.");
+  FDASSERT(axis.size() > 0, "axis should not be empty.");
+  std::vector<int64_t> axis_src;
+  std::vector<int64_t> out_shape = x.Shape();
+  int64_t rank = x.Shape().size();
+  for (auto axis_single : axis) {
+    FDASSERT(axis_single >= -rank && axis_single < rank,
+             "The axis is expected to be in range of [%d, %d), but got %d",
+             -rank, rank, axis_single);
+    if (axis_single < 0) {
+      axis_single += rank;
+    }
+    axis_src.push_back(axis_single);
+    out_shape[axis_single] = 1;
+  }
+  std::vector<int64_t> axis_dst;
+  for (int64_t i = 0; i < rank; ++i) {
+    if (std::find(axis_src.begin(), axis_src.end(), i) == axis_src.end()) {
+      axis_dst.push_back(i);
+    }
+  }
+  axis_dst.insert(axis_dst.end(), axis_src.begin(), axis_src.end());
+  FDTensor y;
+  Transpose(x, &y, axis_dst);
+  std::vector<int64_t> y_shape(rank - axis_src.size(), 0);
+  y_shape.push_back(-1);
+  y.Reshape({y_shape});
+
+  int64_t target_axis = rank - 1;
+  FDTensor mask, valid_counts, mask_any;
+  IsNan(y, &mask);
+  Any(mask, &mask_any, {target_axis}, true);
+  bool *mask_data = reinterpret_cast<bool *>(mask.Data());
+  std::transform(mask_data, mask_data + mask.Numel(), mask_data,
+                 [](const bool &val) { return !val; });
+  Cast(mask_any, &mask_any, FDDataType::FP64);
+  Cast(mask, &mask, FDDataType::FP64);
+  Sum(mask, &valid_counts, {target_axis}, true);
+
+  FDTensor one_tensor(Scalar(static_cast<double>(1.0)));
+
+  std::vector<FDTensor> indices;
+  FDTensor last_index(Scalar(static_cast<double>(x.Shape()[target_axis])));
+  for (auto q_num : q) {
+    FDASSERT(q_num >= 0 && q_num <= 1, "q should be in range [0, 1]");
+    FDTensor q_tensor(static_cast<double>(q_num));
+    FDTensor index = q_tensor * (valid_counts - one_tensor);
+    index = mask_any * last_index + (one_tensor - mask_any) * index;
+    indices.push_back(index);
+  }
+
+  std::vector<FDTensor> outputs;
+  FDTensor sorted_tensor, sorted_indices_tensor;
+  Sort(y, &sorted_tensor, &sorted_indices_tensor, target_axis);
+  Cast(sorted_tensor, &sorted_tensor, FDDataType::FP64);
+
+  FDTensor indices_below, indices_upper;
+  for (auto &&index : indices) {
+    Floor(index, &indices_below);
+    Ceil(index, &indices_upper);
+    Cast(indices_below, &indices_below, FDDataType::INT32);
+    Cast(indices_upper, &indices_upper, FDDataType::INT32);
+    FDTensor tensor_below, tensor_upper;
+    GatherAlongAxis(sorted_tensor, indices_below, &tensor_below, target_axis);
+    GatherAlongAxis(sorted_tensor, indices_upper, &tensor_upper, target_axis);
+    // Need to cast to FP64 to compute with index and tensor_upper
+    Cast(indices_below, &indices_below, FDDataType::FP64);
+
+    FDTensor weight = index - indices_below;
+    FDTensor out = tensor_below + weight * (tensor_upper - tensor_below);
+    out.Squeeze(target_axis);
+    if (out.Dtype() != x.Dtype()) {
+      Cast(out, &out, x.Dtype());
+    }
+    outputs.push_back(std::move(out));
+  }
+  if (outputs.size() > 1) {
+    // Execute stack operation
+    for (auto &output : outputs) {
+      output.ExpandDim(0);
+    }
+    Concat(outputs, out, 0);
+  } else {
+    *out = std::move(outputs[0]);
+  }
+}
+
+void Quantile(const FDTensor &x, const std::vector<double> &q,
+              const std::vector<int> &axis, FDTensor *out) {
+  FD_VISIT_FLOAT_TYPES(x.dtype, "QuantileKernel",
+                       ([&] { QuantileKernel<data_t>(x, q, axis, out); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/quantile.h b/libs/ultrainfer/ultrainfer/function/quantile.h
new file mode 100755
index 0000000000..a678032555
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/quantile.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Compute the quantile of the input along the specified axis. If any values
+ ** in a reduced row are NaN, then the quantiles for that reduction will be NaN.
+    @param x The input tensor.
+    @param q The q for calculate quantile, which should be in range [0, 1].
+    @param axis The axis along which to calculate quantile. axis should be int
+                or list of int.
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Quantile(const FDTensor &x, const std::vector<double> &q,
+                              const std::vector<int> &axis, FDTensor *out);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/reduce.cc b/libs/ultrainfer/ultrainfer/function/reduce.cc
new file mode 100755
index 0000000000..25247f86dd
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/reduce.cc
@@ -0,0 +1,414 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/reduce.h"
+
+#include <limits>
+#include <set>
+
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/function/reduce_functor.h"
+#include "ultrainfer/function/transpose.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace function {
+template <typename T, size_t D, size_t R_D, typename Functor>
+void ReduceFunctor(const FDTensor &input, FDTensor *output,
+                   const std::vector<int64_t> &dims, bool keep_dim) {
+  auto x = EigenTensor<T, D>::From(input);
+  auto x_rank = static_cast<int>(x.dimensions().size());
+  auto reduce_dim = Eigen::array<int, R_D>();
+  std::vector<int64_t> dims_ref = dims;
+
+  auto out_dims = input.shape;
+  for (size_t i = 0; i < dims_ref.size(); ++i) {
+    if (dims_ref[i] < 0)
+      dims_ref[i] = x_rank + dims_ref[i];
+    reduce_dim[i] = dims_ref[i];
+    out_dims[dims_ref[i]] = 1;
+  }
+  auto origin_output_dims = out_dims;
+  output->Allocate(origin_output_dims, TypeToDataType<T>::dtype);
+  // construct the squeezed output tensor
+  if (x_rank > 1) {
+    const int kDelFlag = -2;
+    for (size_t i = 0; i < dims_ref.size(); ++i) {
+      out_dims[dims_ref[i]] = kDelFlag;
+    }
+    out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag),
+                   out_dims.end());
+  }
+
+  auto &place = *EigenDeviceWrapper::GetInstance()->GetDevice();
+  Functor functor;
+  if (D == 1) {
+    auto out = EigenScalar<T>::From(*output);
+    functor(place, &x, &out, reduce_dim);
+  } else {
+    auto out = EigenTensor<T, (D - R_D)>::From(*output, out_dims);
+    functor(place, &x, &out, reduce_dim);
+    if (!keep_dim) {
+      output->shape = std::move(out_dims);
+    }
+  }
+}
+
+#define HANDLE_REDUCE_DIM(NDIM, RDIM)                                          \
+  if (ndim == NDIM && rdim == RDIM) {                                          \
+    ReduceFunctor<OutT, NDIM, RDIM, Functor>(input, output, dims, keep_dim);   \
+  }
+
+inline void GetShuffledDim(const std::vector<int64_t> &src_dims,
+                           std::vector<int64_t> *dst_dims,
+                           const std::vector<int64_t> &reduced_dims,
+                           std::vector<int64_t> *perm_axis) {
+  // check if it's a reduced dim
+  std::vector<bool> src_dims_check(src_dims.size(), false);
+  size_t src_size = src_dims.size();
+  size_t reduce_size = reduced_dims.size();
+  std::vector<int64_t> regular_reduced_dims = reduced_dims;
+  for (size_t i = 0; i < regular_reduced_dims.size(); i++) {
+    if (regular_reduced_dims[i] < 0) {
+      regular_reduced_dims[i] = src_size + regular_reduced_dims[i];
+    }
+  }
+
+  for (size_t i = 0; i < reduce_size; ++i) {
+    dst_dims->at(src_size - reduce_size + i) =
+        src_dims[regular_reduced_dims[i]];
+    (*perm_axis)[src_size - reduce_size + i] = regular_reduced_dims[i];
+    src_dims_check[regular_reduced_dims[i]] = true;
+  }
+
+  size_t offset = 0;
+  for (size_t i = 0; i < src_dims_check.size(); ++i) {
+    bool is_reduced = src_dims_check[i];
+    if (!is_reduced) {
+      (*perm_axis)[offset] = i;
+      dst_dims->at(offset++) = src_dims[i];
+    }
+  }
+}
+
+template <typename OutT>
+void GetShuffledInput(const FDTensor &input, FDTensor *shuffled_input,
+                      const std::vector<int64_t> &dims) {
+  auto shuffled_dims = input.shape;
+  std::vector<int64_t> perm_axis(input.shape.size());
+  GetShuffledDim(input.shape, &shuffled_dims, dims, &perm_axis);
+
+  shuffled_input->Allocate(shuffled_dims, input.dtype);
+  Transpose(input, shuffled_input, perm_axis);
+}
+
+//////////////// HandleLargeDim
+template <typename OutT, typename Functor>
+void HandleLargeDim(const FDTensor &input, FDTensor *output,
+                    const std::vector<int64_t> &dims, bool keep_dim) {
+  auto out_dims = input.shape;
+  std::vector<int64_t> dims_ref = dims;
+  auto x_rank = input.shape.size();
+  for (size_t i = 0; i < dims_ref.size(); ++i) {
+    if (dims_ref[i] < 0)
+      dims_ref[i] = x_rank + dims_ref[i];
+    out_dims[dims_ref[i]] = 1;
+  }
+  if (!keep_dim) {
+    const int kDelFlag = -2;
+    for (size_t i = 0; i < dims_ref.size(); ++i) {
+      out_dims[dims_ref[i]] = kDelFlag;
+    }
+    out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag),
+                   out_dims.end());
+  }
+  output->Allocate(out_dims, TypeToDataType<OutT>::dtype);
+  //  shuffle the reduced dim to the end
+  FDTensor shuffled_input;
+  GetShuffledInput<OutT>(input, &shuffled_input, dims);
+
+  // transpose to 2D tensor whose shape is {unreduced, reduced}.
+  const int64_t unreduced = output->Numel();
+  const int64_t reduced = shuffled_input.Numel() / unreduced;
+  shuffled_input.Allocate({unreduced, reduced}, TypeToDataType<OutT>::dtype);
+
+  output->shape = {unreduced};
+  ReduceFunctor<OutT, 2, 1, Functor>(shuffled_input, output, {1}, keep_dim);
+  output->shape = out_dims;
+}
+
+////////////// ReduceKernel
+
+template <typename OutT, typename Functor>
+void ReduceKernelImpl(const FDTensor &input, FDTensor *output,
+                      const std::vector<int64_t> &dims, bool keep_dim,
+                      bool reduce_all) {
+  output->Allocate({1}, TypeToDataType<OutT>::dtype);
+  const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+  if (reduce_all) {
+    // Flatten and reduce 1-D tensor
+    auto x = EigenVector<OutT>::Flatten(input);
+    auto out = EigenScalar<OutT>::From(*output);
+    auto reduce_dim = Eigen::array<int, 1>({{0}});
+
+    Functor functor;
+    functor(dev, &x, &out, reduce_dim);
+  } else {
+    int ndim = input.shape.size();
+    int rdim = dims.size();
+    if (ndim > 4) {
+      HandleLargeDim<OutT, Functor>(input, output, dims, keep_dim);
+    } else {
+      HANDLE_REDUCE_DIM(4, 3);
+      HANDLE_REDUCE_DIM(4, 2);
+      HANDLE_REDUCE_DIM(4, 1);
+      HANDLE_REDUCE_DIM(3, 2);
+      HANDLE_REDUCE_DIM(3, 1);
+      HANDLE_REDUCE_DIM(2, 1);
+      HANDLE_REDUCE_DIM(1, 1);
+    }
+  }
+}
+
+template <typename OutT, typename Functor>
+void BoolReduceKernel(const FDTensor &input, FDTensor *output,
+                      const std::vector<int64_t> &dims, bool keep_dim,
+                      bool reduce_all) {
+  // The dims has full dim, set the reduce_all is True
+  const auto &input_dim_size = input.shape.size();
+  std::set<int> dims_set(dims.begin(), dims.end());
+  bool full_dim = true;
+  for (auto i = 0; i < input_dim_size; i++) {
+    if (dims_set.find(i) == dims_set.end()) {
+      full_dim = false;
+      break;
+    }
+  }
+  reduce_all = (reduce_all || full_dim);
+
+  ReduceKernelImpl<bool, Functor>(input, output, dims, keep_dim, reduce_all);
+}
+
+template <typename Functor>
+void Reduce(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+            bool keep_dim, bool reduce_all) {
+  // If the dims has full dim, set the reduce_all is True
+  const int &input_dim_size = x.shape.size();
+  std::set<int> dims_set(dims.begin(), dims.end());
+  bool full_dim = true;
+  for (int i = 0; i < input_dim_size; ++i) {
+    if (dims_set.find(i) == dims_set.end() &&
+        dims_set.find(i - input_dim_size) == dims_set.end()) {
+      full_dim = false;
+      break;
+    }
+  }
+  reduce_all = (reduce_all || full_dim);
+
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
+                             ReduceKernelImpl<data_t, Functor>(
+                                 x, out, dims, keep_dim, reduce_all);
+                           }));
+}
+
+enum ArgMinMaxType { kArgMin, kArgMax };
+
+template <typename T, typename Tout, int64_t Rank, ArgMinMaxType argMinMaxValue>
+struct ArgMinMaxFunctor {};
+
+#define DECLARE_ARG_MIN_MAX_FUNCTOR(eigen_op_type, enum_argminmax_value)       \
+  template <typename T, typename Tout, int64_t Rank>                           \
+  struct ArgMinMaxFunctor<T, Tout, Rank, enum_argminmax_value> {               \
+    void operator()(const FDTensor &in, FDTensor *out,                         \
+                    const std::vector<int64_t> &x_dims, int64_t axis,          \
+                    bool keepdims, bool flatten) {                             \
+      const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();       \
+      auto in_eigen = EigenTensor<T, Rank>::From(in, x_dims);                  \
+      if (keepdims) {                                                          \
+        if (!flatten) {                                                        \
+          auto out_eigen = EigenTensor<Tout, Rank>::From(*out);                \
+          out_eigen.device(dev) =                                              \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();              \
+        } else {                                                               \
+          auto out_eigen = EigenScalar<Tout>::From(*out);                      \
+          out_eigen.device(dev) =                                              \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();              \
+        }                                                                      \
+      } else {                                                                 \
+        auto out_eigen = EigenTensor<Tout, Rank - 1>::From(*out);              \
+        out_eigen.device(dev) =                                                \
+            in_eigen.eigen_op_type(axis).template cast<Tout>();                \
+      }                                                                        \
+    }                                                                          \
+  }
+
+DECLARE_ARG_MIN_MAX_FUNCTOR(argmin, ArgMinMaxType::kArgMin);
+DECLARE_ARG_MIN_MAX_FUNCTOR(argmax, ArgMinMaxType::kArgMax);
+
+template <typename T, typename Tout, ArgMinMaxType EnumArgMinMaxValue>
+void ArgMinMaxKernel(const FDTensor &x, FDTensor *out, int64_t axis,
+                     bool keepdims, bool flatten) {
+  bool new_keepdims = keepdims | flatten;
+  // if flatten, will construct the new dims for the cacluate
+  std::vector<int64_t> x_dims;
+  int new_axis = axis;
+  if (flatten) {
+    x_dims = {x.Numel()};
+    // if flatten, the axis just as 0
+    new_axis = 0;
+  } else {
+    x_dims = x.shape;
+    if (axis < 0)
+      new_axis = axis + x_dims.size();
+  }
+#define CALL_ARG_MINMAX_FUNCTOR(rank)                                          \
+  ArgMinMaxFunctor<T, Tout, rank, EnumArgMinMaxValue> functor##rank;           \
+  functor##rank(x, out, x_dims, new_axis, new_keepdims, flatten)
+
+  switch (x_dims.size()) {
+  case 1:
+    CALL_ARG_MINMAX_FUNCTOR(1);
+    break;
+  case 2:
+    CALL_ARG_MINMAX_FUNCTOR(2);
+    break;
+  case 3:
+    CALL_ARG_MINMAX_FUNCTOR(3);
+    break;
+  case 4:
+    CALL_ARG_MINMAX_FUNCTOR(4);
+    break;
+  case 5:
+    CALL_ARG_MINMAX_FUNCTOR(5);
+    break;
+  case 6:
+    CALL_ARG_MINMAX_FUNCTOR(6);
+    break;
+  default:
+    FDASSERT(x_dims.size() <= 6,
+             "%s operator doesn't supports tensors whose ranks are greater "
+             "than 6.",
+             (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax"));
+    break;
+#undef CALL_ARG_MINMAX_FUNCTOR
+  }
+}
+
+template <typename T, ArgMinMaxType EnumArgMinMaxValue>
+void ArgMinMax(const FDTensor &x, FDTensor *out, int64_t axis,
+               FDDataType output_dtype, bool keepdims, bool flatten) {
+  const auto &x_dims = x.shape;
+  int64_t x_rank = x_dims.size();
+  FDASSERT(axis >= -x_rank,
+           "'axis'(%lld) must be greater than or equal to -Rank(X)(%lld).",
+           axis, -x_rank);
+  FDASSERT(axis < x_rank,
+           "'axis'(%lld) must be less than or equal to Rank(X)(%lld).", axis,
+           x_rank);
+  FDASSERT(
+      output_dtype == FDDataType::INT32 || FDDataType::INT64 ||
+          FDDataType::UINT8,
+      "The attribute of dtype in argmin/argmax must be [%s], [%s] or [%s], but "
+      "received [%s].",
+      Str(FDDataType::INT32).c_str(), Str(FDDataType::INT64).c_str(),
+      Str(FDDataType::UINT8).c_str(), Str(output_dtype).c_str());
+  if (axis < 0)
+    axis += x_rank;
+  if (output_dtype == FDDataType::INT32) {
+    int64_t all_element_num = 0;
+    if (flatten) {
+      all_element_num = x.Numel();
+
+    } else {
+      all_element_num = x_dims[axis];
+    }
+    FDASSERT(all_element_num <= (std::numeric_limits<int>::max)(),
+             "The element num of the argmin/argmax input at axis is "
+             "%lld, is larger than int32 maximum value:%d, you must "
+             "set the dtype of argmin/argmax to 'int64'.",
+             all_element_num, (std::numeric_limits<int>::max)());
+  }
+  std::vector<int64_t> vec;
+  if (flatten) {
+    vec.emplace_back(static_cast<int64_t>(1));
+  } else {
+    for (int64_t i = 0; i < axis; i++)
+      vec.emplace_back(x_dims[i]);
+    if (keepdims) {
+      vec.emplace_back(static_cast<int64_t>(1));
+    }
+    for (int64_t i = axis + 1; i < x_rank; i++)
+      vec.emplace_back(x_dims[i]);
+  }
+  out->Allocate(vec, output_dtype);
+
+  FD_VISIT_INT_TYPES(output_dtype, "ArgMinMaxKernel", ([&] {
+                       ArgMinMaxKernel<T, data_t, EnumArgMinMaxValue>(
+                           x, out, axis, keepdims, flatten);
+                     }));
+}
+
+void Max(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+         bool keep_dim, bool reduce_all) {
+  Reduce<MaxFunctor>(x, out, dims, keep_dim, reduce_all);
+}
+
+void Min(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+         bool keep_dim, bool reduce_all) {
+  Reduce<MinFunctor>(x, out, dims, keep_dim, reduce_all);
+}
+
+void Sum(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+         bool keep_dim, bool reduce_all) {
+  Reduce<SumFunctor>(x, out, dims, keep_dim, reduce_all);
+}
+
+void All(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+         bool keep_dim, bool reduce_all) {
+  BoolReduceKernel<bool, AllFunctor>(x, out, dims, keep_dim, reduce_all);
+}
+
+void Any(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+         bool keep_dim, bool reduce_all) {
+  BoolReduceKernel<bool, AnyFunctor>(x, out, dims, keep_dim, reduce_all);
+}
+
+void Mean(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+          bool keep_dim, bool reduce_all) {
+  Reduce<MeanFunctor>(x, out, dims, keep_dim, reduce_all);
+}
+
+void Prod(const FDTensor &x, FDTensor *out, const std::vector<int64_t> &dims,
+          bool keep_dim, bool reduce_all) {
+  Reduce<ProdFunctor>(x, out, dims, keep_dim, reduce_all);
+}
+
+void ArgMax(const FDTensor &x, FDTensor *out, int64_t axis,
+            FDDataType output_dtype, bool keep_dim, bool flatten) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] {
+                             ArgMinMax<data_t, kArgMax>(
+                                 x, out, axis, output_dtype, keep_dim, flatten);
+                           }));
+}
+
+void ArgMin(const FDTensor &x, FDTensor *out, int64_t axis,
+            FDDataType output_dtype, bool keep_dim, bool flatten) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] {
+                             ArgMinMax<data_t, kArgMin>(
+                                 x, out, axis, output_dtype, keep_dim, flatten);
+                           }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/reduce.h b/libs/ultrainfer/ultrainfer/function/reduce.h
new file mode 100755
index 0000000000..7b0d2cc30a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/reduce.h
@@ -0,0 +1,127 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+/** Excute the maximum operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which will be reduced.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param reduce_all Whether to reduce all dims, default false.
+*/
+ULTRAINFER_DECL void Max(const FDTensor &x, FDTensor *out,
+                         const std::vector<int64_t> &dims,
+                         bool keep_dim = false, bool reduce_all = false);
+
+/** Excute the minimum operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which will be reduced.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param reduce_all Whether to reduce all dims, default false.
+*/
+ULTRAINFER_DECL void Min(const FDTensor &x, FDTensor *out,
+                         const std::vector<int64_t> &dims,
+                         bool keep_dim = false, bool reduce_all = false);
+
+/** Excute the sum operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which will be reduced.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param reduce_all Whether to reduce all dims, default false.
+*/
+ULTRAINFER_DECL void Sum(const FDTensor &x, FDTensor *out,
+                         const std::vector<int64_t> &dims,
+                         bool keep_dim = false, bool reduce_all = false);
+
+/** Excute the all operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which will be reduced.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param reduce_all Whether to reduce all dims, default false.
+*/
+ULTRAINFER_DECL void All(const FDTensor &x, FDTensor *out,
+                         const std::vector<int64_t> &dims,
+                         bool keep_dim = false, bool reduce_all = false);
+
+/** Excute the any operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which will be reduced.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param reduce_all Whether to reduce all dims, default false.
+*/
+ULTRAINFER_DECL void Any(const FDTensor &x, FDTensor *out,
+                         const std::vector<int64_t> &dims,
+                         bool keep_dim = false, bool reduce_all = false);
+
+/** Excute the mean operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which will be reduced.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param reduce_all Whether to reduce all dims, default false.
+*/
+ULTRAINFER_DECL void Mean(const FDTensor &x, FDTensor *out,
+                          const std::vector<int64_t> &dims,
+                          bool keep_dim = false, bool reduce_all = false);
+
+/** Excute the product operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which will be reduced.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param reduce_all Whether to reduce all dims, default false.
+*/
+ULTRAINFER_DECL void Prod(const FDTensor &x, FDTensor *out,
+                          const std::vector<int64_t> &dims,
+                          bool keep_dim = false, bool reduce_all = false);
+
+/** Excute the argmax operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis The axis which will be reduced.
+    @param output_dtype The data type of output FDTensor, INT64 or INT32,
+   default to INT64.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param flatten Whether to flatten FDTensor to get the argmin index, default
+   false.
+*/
+ULTRAINFER_DECL void ArgMax(const FDTensor &x, FDTensor *out, int64_t axis,
+                            FDDataType output_dtype = FDDataType::INT64,
+                            bool keep_dim = false, bool flatten = false);
+
+/** Excute the argmin operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis The axis which will be reduced.
+    @param output_dtype The data type of output FDTensor, INT64 or INT32,
+   default to INT64.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param flatten Whether to flatten FDTensor to get the argmin index, default
+   false.
+*/
+ULTRAINFER_DECL void ArgMin(const FDTensor &x, FDTensor *out, int64_t axis,
+                            FDDataType output_dtype = FDDataType::INT64,
+                            bool keep_dim = false, bool flatten = false);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/reduce_functor.h b/libs/ultrainfer/ultrainfer/function/reduce_functor.h
new file mode 100755
index 0000000000..f31210095b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/reduce_functor.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/function/eigen.h"
+namespace ultrainfer {
+namespace function {
+//////// Max Functor ///////
+struct MaxFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) {
+    y->device(dev) = x->maximum(dim);
+  }
+};
+
+//////// Min Functor ///////
+struct MinFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) {
+    y->device(dev) = x->minimum(dim);
+  }
+};
+
+//////// Sum Functor ///////
+struct SumFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) {
+    y->device(dev) = x->sum(dim);
+  }
+};
+
+//////// All Functor ///////
+struct AllFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) {
+    y->device(dev) = x->all(dim);
+  }
+};
+
+//////// Any Functor ///////
+struct AnyFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) {
+    y->device(dev) = x->any(dim);
+  }
+};
+
+//////// Mean Functor ///////
+struct MeanFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) {
+    y->device(dev) = x->mean(dim);
+  }
+};
+
+//////// Prod Functor ///////
+struct ProdFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice &dev, X *x, Y *y, const Dim &dim) {
+    y->device(dev) = x->prod(dim);
+  }
+};
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/slice.cc b/libs/ultrainfer/ultrainfer/function/slice.cc
new file mode 100755
index 0000000000..3d44fab6b7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/slice.cc
@@ -0,0 +1,182 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/slice.h"
+#include "ultrainfer/function/eigen.h"
+
+#include <algorithm>
+
+namespace ultrainfer {
+namespace function {
+
+std::vector<int64_t> GetSliceDims(const std::vector<int64_t> &in_dims,
+                                  const std::vector<int64_t> &axes,
+                                  const std::vector<int64_t> &starts,
+                                  const std::vector<int64_t> &ends,
+                                  std::vector<int64_t> *steps = nullptr) {
+  std::vector<int64_t> slice_dims(in_dims);
+
+  for (size_t i = 0; i < axes.size(); ++i) {
+    int64_t axis = axes[i];
+    if (in_dims[axis] == -1) {
+      continue;
+    }
+
+    int64_t start = starts[i];
+    int64_t end = ends[i];
+    int64_t step = steps == nullptr ? 1 : (*steps)[i];
+
+    if (step > 0) {
+      slice_dims[axis] = (end - start + step - 1) / step;
+    } else {
+      slice_dims[axis] = (end - start + step + 1) / step;
+    }
+  }
+  return slice_dims;
+}
+
+void CheckAndUpdateSliceAttrs(const std::vector<int64_t> &in_dims,
+                              const std::vector<int64_t> &axes,
+                              std::vector<int64_t> *starts,
+                              std::vector<int64_t> *ends,
+                              std::vector<int64_t> *steps = nullptr) {
+  for (size_t i = 0; i < axes.size(); ++i) {
+    int64_t axis = axes[i];
+    FDASSERT(axis < in_dims.size(),
+             "The axis value should be less than the rank of input, "
+             "but received axes[%d] = %d, rank of input is %d.",
+             i, axis, in_dims.size());
+    int64_t dim_value = in_dims[axis];
+
+    if (dim_value > 0) {
+      int64_t step = steps == nullptr ? 1 : (*steps)[i];
+      FDASSERT(step != 0, "Step should not be 0, but received step = %d.",
+               step);
+      int64_t start =
+          (*starts)[i] < 0 ? ((*starts)[i] + dim_value) : (*starts)[i];
+      start = (std::max)(start, static_cast<int64_t>(0));
+
+      int64_t end =
+          0 < step && (*ends)[i] < 0 ? ((*ends)[i] + dim_value) : (*ends)[i];
+      end = (std::min)(end, dim_value);
+
+      if (step > 0) {
+        start = (std::min)(start, dim_value);
+        end = (std::max)(end, static_cast<int64_t>(0));
+        FDASSERT(end > start,
+                 "When step > 0, end should be greater than start, but "
+                 "received end = %d, start = %d.",
+                 end, start)
+      } else {
+        start = (std::min)(start, dim_value - 1);
+        if (end < -1) {
+          end += dim_value;
+        }
+        end = (std::max)(end, static_cast<int64_t>(-1));
+        FDASSERT(start >= end,
+                 "When step < 0, start should be greater than end, but "
+                 "received start = %d, end = %d.",
+                 start, end);
+      }
+
+      (*starts)[i] = start;
+      (*ends)[i] = end;
+    } else if (dim_value == 0) {
+      (*starts)[i] = 0;
+      (*ends)[i] = 0;
+    }
+  }
+}
+
+template <typename T, size_t D>
+void SliceKernel(const FDTensor &x, const std::vector<int64_t> &axes,
+                 const std::vector<int64_t> &starts,
+                 const std::vector<int64_t> &ends, FDTensor *out) {
+  FDASSERT(starts.size() == axes.size(),
+           "The size of starts must be equal to the size of axes.");
+  FDASSERT(ends.size() == axes.size(),
+           "The size of ends must be equal to the size of axes.");
+  auto starts_idx = starts;
+  auto end_idx = ends;
+  auto in_dims = x.Shape();
+  CheckAndUpdateSliceAttrs(in_dims, axes, &starts_idx, &end_idx);
+  auto slice_dims = GetSliceDims(in_dims, axes, starts, ends);
+
+  auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
+  auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
+  for (size_t i = 0; i < D; ++i) {
+    offsets[i] = 0;
+    extents[i] = slice_dims[i];
+  }
+  for (size_t i = 0; i < axes.size(); ++i) {
+    offsets[axes[i]] = starts[i];
+  }
+
+  out->Allocate(slice_dims, x.Dtype());
+  auto in_t = EigenTensor<T, D>::From(x, in_dims);
+  auto out_t = EigenTensor<T, D>::From(*out, slice_dims);
+  const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+  out_t.device(dev) = in_t.slice(offsets, extents);
+}
+
+void Slice(const FDTensor &x, const std::vector<int64_t> &axes,
+           const std::vector<int64_t> &starts, const std::vector<int64_t> &ends,
+           FDTensor *out) {
+  FD_VISIT_ALL_TYPES(
+      x.dtype, "SliceKernel", ([&] {
+        int rank = x.Shape().size();
+        switch (rank) {
+        case 1:
+          SliceKernel<data_t, 1>(x, axes, starts, ends, out);
+          break;
+        case 2:
+          SliceKernel<data_t, 2>(x, axes, starts, ends, out);
+          break;
+        case 3:
+          SliceKernel<data_t, 3>(x, axes, starts, ends, out);
+          break;
+        case 4:
+          SliceKernel<data_t, 4>(x, axes, starts, ends, out);
+          break;
+        case 5:
+          SliceKernel<data_t, 5>(x, axes, starts, ends, out);
+          break;
+        case 6:
+          SliceKernel<data_t, 6>(x, axes, starts, ends, out);
+          break;
+        default:
+          FDASSERT(false,
+                   "The rank of input should be less than 7, but received %d.",
+                   rank);
+        }
+      }));
+}
+
+void Slice(const FDTensor &x, const std::vector<int64_t> &axes,
+           const std::vector<int64_t> &index, FDTensor *out) {
+  std::vector<int64_t> ends = index;
+  for (int i = 0; i < ends.size(); ++i) {
+    ends[i] += 1;
+  }
+  Slice(x, axes, index, ends, out);
+  for (int i = 0; i < axes.size(); ++i) {
+    if (out->Shape().size() <= 1) {
+      break;
+    }
+    out->Squeeze(axes[i]);
+  }
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/slice.h b/libs/ultrainfer/ultrainfer/function/slice.h
new file mode 100755
index 0000000000..240b0455fb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/slice.h
@@ -0,0 +1,44 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** This operator produces a slice of input along multiple axes.
+    @param x The input tensor.
+    @param axes Axes that starts and ends apply to.
+    @param starts If starts is a list or tuple, the elements of it should be
+      integers or Tensors with shape [1]. If starts is an Tensor, it should
+      be an 1-D Tensor. It represents starting indices of corresponding axis
+      in axes
+    @param ends If ends is a list or tuple, the elements of it should be
+      integers or Tensors with shape [1]. If ends is an Tensor, it should
+      be an 1-D Tensor . It represents ending indices of corresponding axis
+      in axes.
+    @param out The output tensor which stores the result.
+*/
+
+ULTRAINFER_DECL void Slice(const FDTensor &x, const std::vector<int64_t> &axes,
+                           const std::vector<int64_t> &starts,
+                           const std::vector<int64_t> &ends, FDTensor *out);
+
+ULTRAINFER_DECL void Slice(const FDTensor &x, const std::vector<int64_t> &axes,
+                           const std::vector<int64_t> &index, FDTensor *out);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/softmax.cc b/libs/ultrainfer/ultrainfer/function/softmax.cc
new file mode 100755
index 0000000000..7cf9fdf640
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/softmax.cc
@@ -0,0 +1,125 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/softmax.h"
+
+#include <cstdlib>
+
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/utils/axis_utils.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace function {
+template <typename T> struct ValueClip {
+  T operator()(const T &x) const {
+    const T kThreshold = static_cast<T>(-64.);
+    return x < kThreshold ? kThreshold : x;
+  }
+};
+
+template <typename T> struct SoftmaxEigen {
+  void operator()(const FDTensor &x, FDTensor *out, int axis_dim) {
+    constexpr int kBatchDim = 0;
+    constexpr int kClassDim = 1;
+    constexpr int kAxisDim = 1;
+
+    auto logits = EigenMatrix<T>::From(x);
+    auto softmax = EigenMatrix<T>::From(*out);
+
+    const int batch_size = logits.dimension(kBatchDim);
+    const int num_classes = logits.dimension(kClassDim);
+    const int num_remain = num_classes / axis_dim;
+    Eigen::DSizes<int, 1> along_axis(kAxisDim);
+    Eigen::DSizes<int, 2> batch_classes(batch_size, num_classes);
+    Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
+    Eigen::DSizes<int, 2> one_by_class(1, num_classes);
+    Eigen::DSizes<int, 3> batch_one_remain(batch_size, 1, num_remain);
+    Eigen::DSizes<int, 3> one_axis_one(1, axis_dim, 1);
+    Eigen::DSizes<int, 2> one_axis(1, axis_dim);
+    Eigen::DSizes<int, 3> batch_axis_remain(batch_size, axis_dim, num_remain);
+
+    const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+    // For numerical stability, logits should be shifted by maximum number along
+    // axis, calculate shifted_logits into softmax tensor for memory reuse.
+    if (num_remain == 1) {
+      // axis == -1, axis and class in same dimension, calculate along
+      // class dimension directly for higher performance
+      softmax.device(dev) = (logits - logits.maximum(along_axis)
+                                          .eval()
+                                          .reshape(batch_by_one)
+                                          .broadcast(one_by_class))
+                                .unaryExpr(ValueClip<T>());
+    } else {
+      // axis != -1, class dimension split into (axis, remain), max and sum
+      // should be calculated along axis dimension
+      softmax.device(dev) =
+          (logits.reshape(batch_axis_remain) - logits.reshape(batch_axis_remain)
+                                                   .maximum(along_axis)
+                                                   .eval()
+                                                   .reshape(batch_one_remain)
+                                                   .broadcast(one_axis_one)
+                                                   .reshape(batch_axis_remain))
+              .reshape(batch_classes)
+              .unaryExpr(ValueClip<T>());
+    }
+    softmax.device(dev) = softmax.exp();
+    softmax.device(dev) = (softmax * softmax.reshape(batch_axis_remain)
+                                         .sum(along_axis)
+                                         .inverse()
+                                         .eval()
+                                         .broadcast(one_axis));
+  }
+};
+
+template <typename T>
+void SoftmaxFunctor(const FDTensor &x, FDTensor *out, int axis) {
+  SoftmaxEigen<T>()(x, out, axis);
+}
+
+template <typename T>
+void SoftmaxKernel(const FDTensor &x, FDTensor *out, int axis) {
+  const int rank = x.shape.size();
+  const int calc_axis = CanonicalAxis(axis, rank);
+  int axis_dim = x.shape[calc_axis];
+  out->Allocate(x.shape, x.dtype);
+  if (out->Numel() == 0) {
+    return;
+  }
+  const int n = SizeToAxis(calc_axis, x.shape);
+  const int d = SizeFromAxis(calc_axis, x.shape);
+  // Reshape to 2d tensor
+
+  FDTensor x_2d, out_2d;
+  x_2d.SetExternalData({n, d}, x.dtype, const_cast<void *>(x.Data()));
+  out_2d.SetExternalData({n, d}, out->dtype, out->Data());
+
+  SoftmaxFunctor<T>(x_2d, &out_2d, axis_dim);
+}
+
+void Softmax(const FDTensor &x, FDTensor *out, int axis) {
+  FDASSERT(
+      std::abs(axis) < x.shape.size(),
+      "The absolute given axis should be smaller than the input's "
+      "dimension. Expected absolute axis is smaller than %lu, but receive %d.",
+      x.shape.size(), std::abs(axis));
+  // Note(zhoushunjie): The FDTensor out may equal to FDTensor x, so firstly we
+  // use out_temp to get the softmax result, then we move the out_temp to out.
+  FDTensor out_tmp;
+  FD_VISIT_FLOAT_TYPES(x.dtype, "SoftmaxKernel",
+                       ([&] { SoftmaxKernel<data_t>(x, &out_tmp, axis); }));
+  *out = std::move(out_tmp);
+}
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/softmax.h b/libs/ultrainfer/ultrainfer/function/softmax.h
new file mode 100755
index 0000000000..29a1258e98
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/softmax.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+/** Excute the softmax operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis The axis to be computed softmax value.
+*/
+ULTRAINFER_DECL void Softmax(const FDTensor &x, FDTensor *out, int axis = -1);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/sort.cc b/libs/ultrainfer/ultrainfer/function/sort.cc
new file mode 100755
index 0000000000..8f062883dc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/sort.cc
@@ -0,0 +1,120 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/sort.h"
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/function/transpose.h"
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T, typename Type>
+static void FullSort(Type input_height, Type input_width, int input_dim,
+                     const FDTensor *input, FDTensor *out, FDTensor *indices,
+                     bool descending) {
+  out->Allocate(input->Shape(), input->Dtype());
+  indices->Allocate(input->Shape(), TypeToDataType<Type>::dtype);
+
+  T *t_out = reinterpret_cast<T *>(out->Data());
+  Type *t_indices = reinterpret_cast<Type *>(indices->Data());
+
+  for (Type i = 0; i < input_height; ++i) {
+    std::vector<std::pair<T, Type>> col_vec;
+    col_vec.reserve(input_width);
+    if (input_dim == 1) {
+      auto e_input = EigenVector<T>::Flatten(*input);
+      for (Type j = 0; j < input_width; ++j) {
+        col_vec.push_back(std::pair<T, Type>(e_input(j), j));
+      }
+    } else {
+      auto e_input = EigenMatrix<T>::Reshape(*input, input_dim - 1);
+      for (Type j = 0; j < input_width; ++j) {
+        col_vec.push_back(std::pair<T, Type>(e_input(i, j), j));
+      }
+    }
+    std::sort(col_vec.begin(), col_vec.end(),
+              [&](const std::pair<T, Type> &l, const std::pair<T, Type> &r) {
+                if (descending)
+                  return (std::isnan(static_cast<double>(l.first)) &&
+                          !std::isnan(static_cast<double>(r.first))) ||
+                         (l.first > r.first);
+                else
+                  return (!std::isnan(static_cast<double>(l.first)) &&
+                          std::isnan(static_cast<double>(r.first))) ||
+                         (l.first < r.first);
+              });
+
+    for (Type j = 0; j < input_width; ++j) {
+      t_out[i * input_width + j] = col_vec[j].first;
+      t_indices[i * input_width + j] = col_vec[j].second;
+    }
+  }
+}
+
+template <typename T>
+void SortKernel(const FDTensor &x, FDTensor *out, FDTensor *indices,
+                FDDataType indices_type, bool descending, int axis) {
+  auto input_shape = x.Shape();
+  int rank = input_shape.size();
+  axis = (axis < 0) ? (rank + axis) : axis;
+  // Do full sort
+  if (axis == -1 || axis + 1 == rank) {
+    int64_t numel = x.Numel();
+    int64_t input_width = input_shape[axis];
+    int64_t input_height = numel / input_width;
+    FD_VISIT_INT_TYPES(indices_type, "FullSort", ([&] {
+                         FullSort<T, data_t>(input_height, input_width, rank,
+                                             &x, out, indices, descending);
+                       }));
+  } else {
+    // If not full sort do transpose
+    std::vector<int64_t> trans;
+    for (int i = 0; i < axis; i++) {
+      trans.push_back(i);
+    }
+    trans.push_back(rank - 1);
+    for (int i = axis + 1; i < rank - 1; i++) {
+      trans.push_back(i);
+    }
+    trans.push_back(axis);
+
+    FDTensor trans_inp;
+    Transpose(x, &trans_inp, trans);
+    int64_t numel = x.Numel();
+    int64_t input_width = input_shape[axis];
+    int64_t input_height = numel / input_width;
+    FD_VISIT_INT_TYPES(indices_type, "FullSort", ([&] {
+                         FullSort<T, data_t>(input_height, input_width, rank,
+                                             &trans_inp, out, indices,
+                                             descending);
+                       }));
+    // transpose back
+    Transpose(*out, out, trans);
+    Transpose(*indices, indices, trans);
+  }
+}
+
+void Sort(const FDTensor &x, FDTensor *out, FDTensor *indices, int axis,
+          bool descending, FDDataType indices_type) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "SortKernel", ([&] {
+                             SortKernel<data_t>(x, out, indices, indices_type,
+                                                descending, axis);
+                           }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/sort.h b/libs/ultrainfer/ultrainfer/function/sort.h
new file mode 100755
index 0000000000..fea3b8ce82
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/sort.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/**
+ * @brief Performs sorting on the input tensor along the given axis and outputs
+ *        two tensors, Output(Out) and Output(Indices). They reserve the same
+ *        shape with Input(X), and Output(Out) represents the sorted tensor
+ *        while Output(Indices) gives the sorted order along the given axis
+ *        Attr(axis).
+ * @param  x            The input of sort
+ * @param  out          The sorted tensor of sort op, with the same shape as
+ *                      x
+ * @param  indices      The indices of a tensor giving the sorted order, with
+ *                      the same shape as x
+ * @param  axis         The axis along which to sort the tensor.
+ *                      When axis < 0, the actual axis will be the |axis|'th
+ *                      counting backwards
+ * @param  descending   The descending attribute is a flag to tell
+ *                      algorithm how to sort the input data.
+ *                      If descending is true, will sort by descending order,
+ *                      else if false, sort by ascending order
+ * @param  indices_type The data type of indices, default to int64
+ */
+ULTRAINFER_DECL void Sort(const FDTensor &x, FDTensor *out, FDTensor *indices,
+                          int axis = 0, bool descending = false,
+                          FDDataType indices_type = FDDataType::INT64);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/split.cc b/libs/ultrainfer/ultrainfer/function/split.cc
new file mode 100755
index 0000000000..be70ff115c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/split.cc
@@ -0,0 +1,160 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/split.h"
+#include "ultrainfer/utils/utils.h"
+#include <cstring>
+
+namespace ultrainfer {
+namespace function {
+
+/*
+ * All tensors' dimension should be the same and the values of
+ * each dimension must be the same, except the axis dimension.
+ */
+template <typename T> struct SplitFunctor {
+public:
+  void operator()(const FDTensor &input,
+                  const std::vector<const FDTensor *> &ref_inputs, int axis,
+                  std::vector<FDTensor> *outputs) {
+    if (input.Numel() == 0) {
+      return;
+    }
+
+    size_t num = outputs->size();
+
+    int input_rows = 1;
+    auto dim_0 = ref_inputs[0]->Shape();
+    for (int i = 0; i < axis; ++i) {
+      input_rows *= dim_0[i];
+    }
+
+    int input_cols = 0;
+
+    std::vector<int64_t> output_cols(outputs->size());
+    for (size_t i = 0; i < num; ++i) {
+      int t_cols = ref_inputs[i]->Numel() / input_rows;
+      input_cols += t_cols;
+      output_cols[i] = t_cols;
+    }
+
+    // computation
+    for (int k = 0; k < input_rows; ++k) {
+      const T *src_ptr =
+          reinterpret_cast<const T *>(input.Data()) + k * input_cols;
+      int col_idx = 0;
+      for (size_t j = 0; j < num; ++j) {
+        int col_len = output_cols[j];
+        auto *out_tensor = &(outputs->at(j));
+        if (out_tensor != nullptr) {
+          T *dst_ptr = reinterpret_cast<T *>(out_tensor->Data()) + k * col_len;
+          std::memcpy(dst_ptr, src_ptr + col_idx, sizeof(T) * col_len);
+        }
+        col_idx += col_len;
+      }
+    }
+  }
+};
+
+inline int GetSplitAxisValue(const FDTensor &x, int axis) {
+  int rank = x.Shape().size();
+  FDASSERT(axis >= -rank && axis < rank,
+           "The axis is expected to be in range of [%d, %d), but got %d", -rank,
+           rank, axis);
+  if (axis < 0) {
+    axis = axis + rank;
+  }
+  return axis;
+}
+
+void CreateSplitOutputs(const FDTensor &x,
+                        const std::vector<int> &sections_data,
+                        std::vector<FDTensor> *outs, int axis) {
+  axis = GetSplitAxisValue(x, axis);
+  auto input_axis_dim = x.Shape().at(axis);
+  std::vector<int> sections_vec;
+  const int unknow_dim_val = -1;
+  int unknow_dim_idx = -1;
+  int num_of_unknow = 0;
+  int sum_of_section = 0;
+
+  for (size_t i = 0; i < sections_data.size(); ++i) {
+    sections_vec.push_back(sections_data[i]);
+    if (sections_data[i] == unknow_dim_val) {
+      num_of_unknow++;
+      unknow_dim_idx = i;
+    } else {
+      sum_of_section += sections_data[i];
+    }
+  }
+
+  FDASSERT(num_of_unknow <= 1,
+           "Only one dimension value of Attr(num_or_sections) "
+           "in SplitOp can be -1. "
+           "But received Attr(num_or_sections) = [%s].",
+           Str(sections_data).c_str());
+  if (unknow_dim_idx != -1) {
+    // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1].
+    // input_axis_dim = 5, sum_of_sections = 5.
+    // the following check will fail.
+    FDASSERT(sum_of_section < input_axis_dim,
+             "Sum of Attr(num_or_sections) other than unknown section "
+             "must be less than the input's "
+             "size "
+             "along the split dimension. But received Attr(num_or_sections) "
+             "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
+             Str(sections_data).c_str(), Str(x.Shape()).c_str(), axis);
+    sections_vec[unknow_dim_idx] = input_axis_dim - sum_of_section;
+  } else {
+    FDASSERT(sum_of_section == input_axis_dim,
+             "Sum of Attr(num_or_sections) must be equal to the input's "
+             "size "
+             "along the split dimension. But received Attr(num_or_sections)"
+             " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
+             Str(sections_data).c_str(), Str(x.Shape()).c_str(), axis);
+  }
+  // fill out dims
+  std::vector<std::vector<int64_t>> out_dims(sections_vec.size(), x.Shape());
+  for (size_t i = 0; i < sections_vec.size(); ++i) {
+    out_dims[i][axis] = sections_vec[i];
+  }
+  for (size_t i = 0; i < sections_vec.size(); ++i) {
+    (*outs)[i].Allocate(out_dims[i], x.Dtype());
+  }
+}
+
+template <typename T>
+void SplitKernel(const FDTensor &x, const std::vector<int> &section,
+                 std::vector<FDTensor> *outs, int axis) {
+  size_t out_number = section.size();
+  outs->resize(out_number);
+  CreateSplitOutputs(x, section, outs, axis);
+
+  std::vector<const FDTensor *> shape_refer;
+  for (size_t j = 0; j < outs->size(); ++j) {
+    shape_refer.emplace_back(&((*outs)[j]));
+  }
+  SplitFunctor<T> functor;
+  functor(x, shape_refer, axis, outs);
+}
+
+void Split(const FDTensor &x, const std::vector<int> &num_or_sections,
+           std::vector<FDTensor> *out, int axis) {
+  FD_VISIT_ALL_TYPES(x.Dtype(), "Split", ([&] {
+                       SplitKernel<data_t>(x, num_or_sections, out, axis);
+                     }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/split.h b/libs/ultrainfer/ultrainfer/function/split.h
new file mode 100755
index 0000000000..1b3a2063ec
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/split.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Split the input tensor into multiple sub-Tensors.
+    @param x The input tensor.
+    @param num_or_sections f num_or_sections is an int, then num_or_sections
+           indicates the number of equal sized sub-Tensors that the x will
+           be divided into.
+    @param out The output vector tensor which stores the result.
+    @param axis Axis which will be splitted.
+*/
+
+ULTRAINFER_DECL void Split(const FDTensor &x,
+                           const std::vector<int> &num_or_sections,
+                           std::vector<FDTensor> *out, int axis = 0);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/tile.cc b/libs/ultrainfer/ultrainfer/function/tile.cc
new file mode 100755
index 0000000000..3406d690f1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/tile.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/tile.h"
+#include "ultrainfer/function/eigen.h"
+
+namespace ultrainfer {
+namespace function {
+
+template <typename T, int Rank>
+void TileFunctor(const FDTensor &x,
+                 const std::vector<int64_t> &origin_repeat_times,
+                 FDTensor *out) {
+  auto x_shape = x.Shape();
+  auto repeat_times = origin_repeat_times;
+  for (size_t i = 0; i < repeat_times.size(); ++i) {
+    FDASSERT(repeat_times[i] > 0,
+             "All elements of the input 'repeat_times' "
+             "for tile op must be positive integers, but "
+             "the value received is %d.",
+             repeat_times[i]);
+  }
+  if (repeat_times.size() < x_shape.size()) {
+    int diff = x_shape.size() - repeat_times.size();
+    repeat_times.insert(repeat_times.begin(), diff, 1);
+  } else {
+    int diff = repeat_times.size() - x_shape.size();
+    x_shape.insert(x_shape.begin(), diff, 1);
+  }
+  FDASSERT(repeat_times.size() == x_shape.size(),
+           "The rank (%d) of the input 'x' and the rank (%d) of the input "
+           "'repeat_times' for tile op must match after promotion.",
+           x_shape.size(), repeat_times.size());
+
+  if (Rank == 0) {
+    // Deep copy
+    *out = x;
+    return;
+  }
+
+  FDTensor out_tmp;
+  Eigen::DSizes<Eigen::DenseIndex, Rank> bcast_dims;
+  for (size_t i = 0; i < repeat_times.size(); ++i) {
+    bcast_dims[i] = repeat_times[i];
+  }
+
+  std::vector<int64_t> out_shape(x_shape);
+  for (size_t i = 0; i < repeat_times.size(); ++i) {
+    out_shape[i] *= repeat_times[i];
+  }
+
+  out_tmp.Allocate(out_shape, x.Dtype());
+  auto eigen_x = EigenTensor<T, Rank>::From(x, x_shape);
+  auto eigen_out = EigenTensor<T, Rank>::From(out_tmp, out_shape);
+
+  const auto &dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
+  eigen_out.device(dev) = eigen_x.broadcast(bcast_dims);
+
+  *out = std::move(out_tmp);
+}
+
+template <typename T>
+void TileKernel(const FDTensor &x, const std::vector<int64_t> &repeat_times,
+                FDTensor *out) {
+  auto rank = x.Shape().size();
+  auto repeat_times_size = repeat_times.size();
+  rank = (std::max)(rank, repeat_times_size);
+  switch (rank) {
+  case 0:
+    *out = x;
+    break;
+  case 1:
+    TileFunctor<T, 1>(x, repeat_times, out);
+    break;
+  case 2:
+    TileFunctor<T, 2>(x, repeat_times, out);
+    break;
+  case 3:
+    TileFunctor<T, 3>(x, repeat_times, out);
+    break;
+  case 4:
+    TileFunctor<T, 4>(x, repeat_times, out);
+    break;
+  case 5:
+    TileFunctor<T, 5>(x, repeat_times, out);
+    break;
+  case 6:
+    TileFunctor<T, 6>(x, repeat_times, out);
+    break;
+  }
+}
+
+void Tile(const FDTensor &x, const std::vector<int64_t> &repeat_times,
+          FDTensor *out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "TileKernel",
+                     ([&] { TileKernel<data_t>(x, repeat_times, out); }));
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/tile.h b/libs/ultrainfer/ultrainfer/function/tile.h
new file mode 100755
index 0000000000..9ba545d894
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/tile.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+namespace function {
+
+/** Construct a new Tensor by repeating x the number of times given by
+ ** repeat_times. After tiling, the value of the i’th dimension of the
+ ** output is equal to x.shape[i]*repeat_times[i]. Both the number of
+ ** dimensions of x and the number of elements in repeat_times should
+ ** be less than or equal to 6.Support all data types.
+    @param x The input tensor.
+    @param repeat_times The lower bound
+    @param out The output tensor which stores the result.
+*/
+ULTRAINFER_DECL void Tile(const FDTensor &x,
+                          const std::vector<int64_t> &repeat_times,
+                          FDTensor *out);
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/transpose.cc b/libs/ultrainfer/ultrainfer/function/transpose.cc
new file mode 100755
index 0000000000..e33c986140
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/transpose.cc
@@ -0,0 +1,123 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/function/transpose.h"
+#include "ultrainfer/function/eigen.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace function {
+template <typename T> struct TransposeNormalKernel {
+  void operator()(const FDTensor &in, FDTensor *out,
+                  const std::vector<int64_t> &axis) {
+    const int rank = axis.size();
+    auto in_stride = GetStride(in.shape);
+    auto out_stride = GetStride(out->shape);
+    const T *in_ptr = reinterpret_cast<const T *>(in.Data());
+    T *out_ptr = reinterpret_cast<T *>(out->Data());
+
+    auto transpose_helper = [&](int64_t beg, int64_t end) {
+      for (int64_t out_idx = beg; out_idx < end; ++out_idx) {
+        int64_t in_idx = 0;
+        int64_t tmp_idx = out_idx;
+        // calculate the input index
+        for (int i = 0; i < rank; ++i) {
+          const int64_t coordinate = tmp_idx / out_stride[i];
+          tmp_idx -= coordinate * out_stride[i];
+          in_idx += coordinate * in_stride[axis[i]];
+        }
+        out_ptr[out_idx] = in_ptr[in_idx];
+      }
+    };
+    transpose_helper(0, out->Numel());
+  }
+};
+
+template <typename T, int Rank> struct TransposeKernelImpl {
+  void operator()(const FDTensor &in, FDTensor *out,
+                  const std::vector<int64_t> &axis) {
+    Eigen::array<int, Rank> permute;
+    for (int i = 0; i < Rank; i++) {
+      permute[i] = axis[i];
+    }
+
+    auto &place = *EigenDeviceWrapper::GetInstance()->GetDevice();
+    auto eigen_in = EigenTensor<T, Rank>::From(in);
+    auto eigen_out = EigenTensor<T, Rank>::From(*out);
+    eigen_out.device(place) = eigen_in.shuffle(permute);
+  }
+};
+
+template <typename T>
+void TransposeKernel(const FDTensor &x, FDTensor *out,
+                     const std::vector<int64_t> &axis) {
+  int rank = axis.size();
+  switch (rank) {
+  case 1:
+    TransposeKernelImpl<T, 1> trans1;
+    trans1(x, out, axis);
+    break;
+  case 2:
+    TransposeKernelImpl<T, 2> trans2;
+    trans2(x, out, axis);
+    break;
+  case 3:
+    TransposeKernelImpl<T, 3> trans3;
+    trans3(x, out, axis);
+    break;
+  case 4:
+    TransposeKernelImpl<T, 4> trans4;
+    trans4(x, out, axis);
+    break;
+  default:
+    // for rank >= 4 situation
+    TransposeNormalKernel<T> trans_normal;
+    trans_normal(x, out, axis);
+  }
+}
+
+void Transpose(const FDTensor &x, FDTensor *out,
+               const std::vector<int64_t> &dims) {
+  size_t dims_size = dims.size();
+  FDASSERT(dims_size == x.shape.size(),
+           "The input tensor's dimension should be equal to the dims's size. "
+           "Expect dims size is %lu, but receive %lu.",
+           x.shape.size(), dims_size);
+  std::vector<int> count(dims_size, 0);
+  for (size_t i = 0; i < dims_size; i++) {
+    FDASSERT(dims[i] >= 0,
+             "The dims should be greater than or equal to 0, but receive %lld.",
+             dims[i]);
+    FDASSERT(dims[i] < static_cast<int>(dims_size) && ++count[dims[i]] == 1,
+             "Each element of Attribute axis should be a unique value range "
+             "from 0 to (dims - 1), where the dims is the axis's size, unique "
+             "value means this axis value can appear only once. ");
+  }
+  std::vector<int64_t> out_dims(dims_size);
+  for (size_t i = 0; i < dims_size; i++) {
+    out_dims[i] = x.shape[dims[i]];
+  }
+
+  // Note(zhoushunjie): The FDTensor out may equal to FDTensor x, so firstly we
+  // use out_temp to get the transposed result, then we move the out_temp to
+  // out.
+  FDTensor out_temp;
+  out_temp.Allocate(out_dims, x.dtype);
+  FD_VISIT_ALL_TYPES(x.dtype, "TransposeKernel",
+                     ([&] { TransposeKernel<data_t>(x, &out_temp, dims); }));
+  *out = std::move(out_temp);
+}
+
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/function/transpose.h b/libs/ultrainfer/ultrainfer/function/transpose.h
new file mode 100755
index 0000000000..687f7603de
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/function/transpose.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+
+namespace ultrainfer {
+
+/** \brief All C++ FDTensor Operation APIs are defined inside this namespace
+ *
+ */
+namespace function {
+/** Excute the transpose operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param dims The vector of axis which the input tensor will transpose.
+*/
+ULTRAINFER_DECL void Transpose(const FDTensor &x, FDTensor *out,
+                               const std::vector<int64_t> &dims);
+} // namespace function
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pipeline.h b/libs/ultrainfer/ultrainfer/pipeline.h
new file mode 100755
index 0000000000..6568e1a106
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pipeline.h
@@ -0,0 +1,21 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "ultrainfer/core/config.h"
+#ifdef ENABLE_VISION
+#include "ultrainfer/pipeline/pptinypose/pipeline.h"
+#endif
+
+#include "ultrainfer/vision/visualize/visualize.h"
diff --git a/libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc b/libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc
new file mode 100755
index 0000000000..b42fb7e2c5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pipeline/pipeline_pybind.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindPPTinyPosePipeline(pybind11::module &m);
+
+void BindPipeline(pybind11::module &m) { BindPPTinyPosePipeline(m); }
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc
new file mode 100755
index 0000000000..7b806fe947
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pipeline/pptinypose/pipeline.h"
+
+namespace ultrainfer {
+namespace pipeline {
+PPTinyPose::PPTinyPose(
+    ultrainfer::vision::detection::PicoDet *det_model,
+    ultrainfer::vision::keypointdetection::PPTinyPose *pptinypose_model)
+    : detector_(det_model), pptinypose_model_(pptinypose_model) {}
+
+bool PPTinyPose::Detect(cv::Mat *img,
+                        ultrainfer::vision::DetectionResult *detection_res) {
+  if (!detector_->Predict(img, detection_res)) {
+    FDERROR << "There's a error while detectiong human box in image."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPTinyPose::KeypointDetect(
+    cv::Mat *img, ultrainfer::vision::KeyPointDetectionResult *result,
+    ultrainfer::vision::DetectionResult &detection_result) {
+  if (!pptinypose_model_->Predict(img, result, detection_result)) {
+    FDERROR << "There's a error while detecting keypoint in image "
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPTinyPose::Predict(cv::Mat *img,
+                         ultrainfer::vision::KeyPointDetectionResult *result) {
+  result->Clear();
+  ultrainfer::vision::DetectionResult detection_res;
+  if (nullptr != detector_ && !Detect(img, &detection_res)) {
+    FDERROR << "Failed to detect image." << std::endl;
+    return false;
+  }
+  ultrainfer::vision::DetectionResult filter_detection_res;
+  for (size_t i = 0; i < detection_res.boxes.size(); ++i) {
+    if (detection_res.scores[i] > detection_model_score_threshold) {
+      filter_detection_res.boxes.push_back(detection_res.boxes[i]);
+      filter_detection_res.scores.push_back(detection_res.scores[i]);
+      filter_detection_res.label_ids.push_back(detection_res.label_ids[i]);
+    }
+  }
+  if (nullptr != pptinypose_model_ &&
+      !KeypointDetect(img, result, filter_detection_res)) {
+    FDERROR << "Failed to detect keypoint in image " << std::endl;
+    return false;
+  }
+  return true;
+};
+
+} // namespace pipeline
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h
new file mode 100755
index 0000000000..c3a8dcc7b1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pipeline.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/detection/ppdet/model.h"
+#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose.h"
+
+namespace ultrainfer {
+/** \brief All pipeline model APIs are defined inside this namespace
+ *
+ */
+namespace pipeline {
+
+/*! @brief PPTinyPose Pipeline object used when to load a detection model +
+ * pptinypose model
+ */
+class ULTRAINFER_DECL PPTinyPose {
+public:
+  /** \brief Set initialized detection model object and pptinypose model object
+   *
+   * \param[in] det_model Initialized detection model object
+   * \param[in] pptinypose_model Initialized pptinypose model object
+   */
+  PPTinyPose(
+      ultrainfer::vision::detection::PicoDet *det_model,
+      ultrainfer::vision::keypointdetection::PPTinyPose *pptinypose_model);
+
+  /** \brief Predict the keypoint detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread()
+   * \param[in] result The output keypoint detection result will be writen to
+   * this structure \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *img,
+                       ultrainfer::vision::KeyPointDetectionResult *result);
+
+  /* \brief The score threshold for detectin model to filter bbox before
+   * inputting pptinypose model
+   */
+  float detection_model_score_threshold = 0;
+
+protected:
+  ultrainfer::vision::detection::PicoDet *detector_ = nullptr;
+  ultrainfer::vision::keypointdetection::PPTinyPose *pptinypose_model_ =
+      nullptr;
+
+  virtual bool Detect(cv::Mat *img,
+                      ultrainfer::vision::DetectionResult *result);
+  virtual bool
+  KeypointDetect(cv::Mat *img,
+                 ultrainfer::vision::KeyPointDetectionResult *result,
+                 ultrainfer::vision::DetectionResult &detection_result);
+};
+
+} // namespace pipeline
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc
new file mode 100755
index 0000000000..7c6f3f9610
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pipeline/pptinypose/pptinyposepipeline_pybind.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+#include <pybind11/stl.h>
+
+namespace ultrainfer {
+void BindPPTinyPosePipeline(pybind11::module &m) {
+  pybind11::class_<pipeline::PPTinyPose>(m, "PPTinyPose")
+
+      .def(
+          pybind11::init<ultrainfer::vision::detection::PicoDet *,
+                         ultrainfer::vision::keypointdetection::PPTinyPose *>())
+      .def("predict",
+           [](pipeline::PPTinyPose &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::KeyPointDetectionResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+
+      .def_readwrite("detection_model_score_threshold",
+                     &pipeline::PPTinyPose::detection_model_score_threshold);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc b/libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc
new file mode 100755
index 0000000000..82a526e9ed
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pybind/fastdeploy_model.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/ultrainfer_model.h"
+
+namespace ultrainfer {
+
+void BindFDModel(pybind11::module &m) {
+  pybind11::class_<UltraInferModel>(m, "UltraInferModel")
+      .def(pybind11::init<>(), "Default Constructor")
+      .def("model_name", &UltraInferModel::ModelName)
+      .def("num_inputs_of_runtime", &UltraInferModel::NumInputsOfRuntime)
+      .def("num_outputs_of_runtime", &UltraInferModel::NumOutputsOfRuntime)
+      .def("input_info_of_runtime", &UltraInferModel::InputInfoOfRuntime)
+      .def("output_info_of_runtime", &UltraInferModel::OutputInfoOfRuntime)
+      .def("enable_record_time_of_runtime",
+           &UltraInferModel::EnableRecordTimeOfRuntime)
+      .def("disable_record_time_of_runtime",
+           &UltraInferModel::DisableRecordTimeOfRuntime)
+      .def("print_statis_info_of_runtime",
+           &UltraInferModel::PrintStatisInfoOfRuntime)
+      .def("get_profile_time", &UltraInferModel::GetProfileTime)
+      .def("initialized", &UltraInferModel::Initialized)
+      .def_readwrite("runtime_option", &UltraInferModel::runtime_option)
+      .def_readwrite("valid_cpu_backends", &UltraInferModel::valid_cpu_backends)
+      .def_readwrite("valid_gpu_backends",
+                     &UltraInferModel::valid_gpu_backends);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc b/libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc
new file mode 100755
index 0000000000..912748acbf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pybind/fd_tensor.cc
@@ -0,0 +1,293 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <dlpack/dlpack.h>
+
+#include "ultrainfer/core/fd_type.h"
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+DLDataType FDToDlpackType(FDDataType fd_dtype) {
+  DLDataType dl_dtype;
+  DLDataTypeCode dl_code;
+
+  // Number of bits required for the data type.
+  size_t dt_size = 0;
+
+  dl_dtype.lanes = 1;
+  switch (fd_dtype) {
+  case FDDataType::BOOL:
+    dl_code = DLDataTypeCode::kDLInt;
+    dt_size = 1;
+    break;
+  case FDDataType::UINT8:
+    dl_code = DLDataTypeCode::kDLUInt;
+    dt_size = 8;
+    break;
+  case FDDataType::INT8:
+    dl_code = DLDataTypeCode::kDLInt;
+    dt_size = 8;
+    break;
+  case FDDataType::INT16:
+    dl_code = DLDataTypeCode::kDLInt;
+    dt_size = 16;
+    break;
+  case FDDataType::INT32:
+    dl_code = DLDataTypeCode::kDLInt;
+    dt_size = 32;
+    break;
+  case FDDataType::INT64:
+    dl_code = DLDataTypeCode::kDLInt;
+    dt_size = 64;
+    break;
+  case FDDataType::FP16:
+    dl_code = DLDataTypeCode::kDLFloat;
+    dt_size = 16;
+    break;
+  case FDDataType::FP32:
+    dl_code = DLDataTypeCode::kDLFloat;
+    dt_size = 32;
+    break;
+  case FDDataType::FP64:
+    dl_code = DLDataTypeCode::kDLFloat;
+    dt_size = 64;
+    break;
+
+  default:
+    FDASSERT(false, "Convert to DlPack, FDType \"%s\" is not supported.",
+             Str(fd_dtype).c_str());
+  }
+
+  dl_dtype.code = dl_code;
+  dl_dtype.bits = dt_size;
+  return dl_dtype;
+}
+
+FDDataType DlpackToFDType(const DLDataType &data_type) {
+  FDASSERT(data_type.lanes == 1, "FDTensor does not support dlpack lanes != 1")
+
+  if (data_type.code == DLDataTypeCode::kDLFloat) {
+    if (data_type.bits == 16) {
+      return FDDataType::FP16;
+    } else if (data_type.bits == 32) {
+      return FDDataType::FP32;
+    } else if (data_type.bits == 64) {
+      return FDDataType::FP64;
+    }
+  }
+
+  if (data_type.code == DLDataTypeCode::kDLInt) {
+    if (data_type.bits == 8) {
+      return FDDataType::INT8;
+    } else if (data_type.bits == 16) {
+      return FDDataType::INT16;
+    } else if (data_type.bits == 32) {
+      return FDDataType::INT32;
+    } else if (data_type.bits == 64) {
+      return FDDataType::INT64;
+    } else if (data_type.bits == 1) {
+      return FDDataType::BOOL;
+    }
+  }
+
+  if (data_type.code == DLDataTypeCode::kDLUInt) {
+    if (data_type.bits == 8) {
+      return FDDataType::UINT8;
+    }
+  }
+
+  return FDDataType::UNKNOWN1;
+}
+
+void DeleteUnusedDltensor(PyObject *dlp) {
+  if (PyCapsule_IsValid(dlp, "dltensor")) {
+    DLManagedTensor *dl_managed_tensor =
+        static_cast<DLManagedTensor *>(PyCapsule_GetPointer(dlp, "dltensor"));
+    dl_managed_tensor->deleter(dl_managed_tensor);
+  }
+}
+
+pybind11::capsule FDTensorToDLPack(FDTensor &fd_tensor) {
+  DLManagedTensor *dlpack_tensor = new DLManagedTensor;
+  dlpack_tensor->dl_tensor.ndim = fd_tensor.shape.size();
+  dlpack_tensor->dl_tensor.byte_offset = 0;
+  dlpack_tensor->dl_tensor.data = fd_tensor.MutableData();
+  dlpack_tensor->dl_tensor.shape = &(fd_tensor.shape[0]);
+  dlpack_tensor->dl_tensor.strides = nullptr;
+  dlpack_tensor->manager_ctx = &fd_tensor;
+  dlpack_tensor->deleter = [](DLManagedTensor *m) {
+    if (m->manager_ctx == nullptr) {
+      return;
+    }
+
+    FDTensor *tensor_ptr = reinterpret_cast<FDTensor *>(m->manager_ctx);
+    pybind11::handle tensor_handle = pybind11::cast(tensor_ptr);
+    tensor_handle.dec_ref();
+    free(m);
+  };
+
+  pybind11::handle tensor_handle = pybind11::cast(&fd_tensor);
+
+  // Increase the reference count by one to make sure that the DLPack
+  // represenation doesn't become invalid when the tensor object goes out of
+  // scope.
+  tensor_handle.inc_ref();
+
+  dlpack_tensor->dl_tensor.dtype = FDToDlpackType(fd_tensor.dtype);
+
+  dlpack_tensor->dl_tensor.device.device_id = fd_tensor.device_id;
+  if (fd_tensor.device == Device::GPU) {
+    if (fd_tensor.is_pinned_memory) {
+      dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDAHost;
+    } else {
+      dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDA;
+    }
+  } else {
+    dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU;
+  }
+
+  return pybind11::capsule(static_cast<void *>(dlpack_tensor), "dltensor",
+                           &DeleteUnusedDltensor);
+}
+
+FDTensor FDTensorFromDLPack(const std::string &name,
+                            const pybind11::capsule &dlpack_tensor) {
+  DLManagedTensor *dl_managed_tensor =
+      static_cast<DLManagedTensor *>(dlpack_tensor.get_pointer());
+
+  void *memory_ptr = dl_managed_tensor->dl_tensor.data;
+  memory_ptr = reinterpret_cast<char *>(memory_ptr) +
+               dl_managed_tensor->dl_tensor.byte_offset;
+
+  int64_t *strides = dl_managed_tensor->dl_tensor.strides;
+
+  int ndim = dl_managed_tensor->dl_tensor.ndim;
+  std::vector<int64_t> dims(dl_managed_tensor->dl_tensor.shape,
+                            dl_managed_tensor->dl_tensor.shape + ndim);
+
+  // Check if the input is contiguous and in C order
+  if (strides != nullptr) {
+    int64_t calculated_stride{1};
+    bool is_contiguous_c_order = true;
+    for (size_t i = 1; i < dims.size(); i++) {
+      if (strides[ndim - i] != calculated_stride) {
+        is_contiguous_c_order = false;
+        break;
+      }
+
+      calculated_stride *= dims[ndim - i];
+    }
+
+    FDASSERT(is_contiguous_c_order,
+             "DLPack tensor is not contiguous. Only contiguous DLPack "
+             "tensors that are stored in C-Order are supported.");
+  }
+
+  Device device;
+  int32_t device_id = -1;
+  bool is_pinned_memory = false;
+  switch (dl_managed_tensor->dl_tensor.device.device_type) {
+  case DLDeviceType::kDLCUDA:
+    device = Device::GPU;
+    device_id = dl_managed_tensor->dl_tensor.device.device_id;
+    break;
+  case DLDeviceType::kDLCPU:
+    device = Device::CPU;
+    break;
+  case DLDeviceType::kDLCUDAHost:
+    device = Device::CPU;
+    is_pinned_memory = true;
+    break;
+  default:
+    FDASSERT(false,
+             ("DLDevice type " +
+              std::to_string(dl_managed_tensor->dl_tensor.device.device_type) +
+              " is not support by Python backend.")
+                 .c_str());
+    break;
+  }
+
+  FDDataType dtype = DlpackToFDType(dl_managed_tensor->dl_tensor.dtype);
+
+  PyCapsule_SetName(dlpack_tensor.ptr(), "used_dlpack");
+  FDTensor fd_tensor(name);
+  fd_tensor.SetExternalData(dims, dtype, memory_ptr, device, device_id);
+  fd_tensor.is_pinned_memory = is_pinned_memory;
+  return fd_tensor;
+}
+
+void BindFDTensor(pybind11::module &m) {
+  pybind11::class_<FDTensor>(m, "FDTensor")
+      .def(pybind11::init<>(), "Default Constructor")
+      .def_readwrite("name", &FDTensor::name)
+      .def_readonly("shape", &FDTensor::shape)
+      .def_readonly("dtype", &FDTensor::dtype)
+      .def_readonly("device", &FDTensor::device)
+      .def("numpy", [](FDTensor &self) { return TensorToPyArray(self); })
+      .def("data", &FDTensor::MutableData)
+      .def("from_numpy",
+           [](FDTensor &self, pybind11::array &pyarray,
+              bool share_buffer = false) {
+             PyArrayToTensor(pyarray, &self, share_buffer);
+           })
+      .def("from_external_data",
+           [](const std::string &name, size_t data_addr,
+              const std::vector<int64_t> &shape, const std::string &data_type,
+              const std::string &data_place, int device_id) {
+             auto fd_data_type = FDDataType::UNKNOWN1;
+             if (data_type == "FP32") {
+               fd_data_type = FDDataType::FP32;
+             } else if (data_type == "FP16") {
+               fd_data_type = FDDataType::FP16;
+             } else if (data_type == "INT32") {
+               fd_data_type = FDDataType::INT32;
+             } else if (data_type == "INT64") {
+               fd_data_type = FDDataType::INT64;
+             } else {
+               FDASSERT(false,
+                        "FDTensor.from_external_data, datatype \"%s\" is not "
+                        "supported.",
+                        data_type.c_str());
+             }
+
+             Device fd_data_place;
+             bool copy = false;
+             if (data_place.find("gpu") != data_place.npos) {
+               fd_data_place = Device::GPU;
+             } else if (data_place.find("cpu") != data_place.npos) {
+               copy = true;
+               fd_data_place = Device::CPU;
+             } else {
+               FDASSERT(false,
+                        ("Device type " + data_place +
+                         " is not support by FDTensor.from_external_data.")
+                            .c_str());
+             }
+             void *data_ptr = nullptr;
+             data_ptr = reinterpret_cast<void *>(data_addr);
+             FDTensor fd_tensor(name);
+             fd_tensor.SetData(shape, fd_data_type,
+                               static_cast<void *>(data_ptr), copy,
+                               fd_data_place, device_id);
+             return fd_tensor;
+           })
+      .def("to_dlpack", &FDTensorToDLPack)
+      .def("from_dlpack", &FDTensorFromDLPack)
+      .def("print_info", &FDTensor::PrintInfo);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pybind/main.cc.in b/libs/ultrainfer/ultrainfer/pybind/main.cc.in
new file mode 100755
index 0000000000..83b28f4f67
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pybind/main.cc.in
@@ -0,0 +1,181 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+
+
+namespace ultrainfer {
+
+void BindFDTensor(pybind11::module&);
+void BindRuntime(pybind11::module&);
+void BindFDModel(pybind11::module&);
+void BindVision(pybind11::module&);
+void BindText(pybind11::module&);
+void BindPipeline(pybind11::module&);
+
+pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) {
+  pybind11::dtype dt;
+  if (fd_dtype == FDDataType::INT32) {
+    dt = pybind11::dtype::of<int32_t>();
+  } else if (fd_dtype == FDDataType::INT64) {
+    dt = pybind11::dtype::of<int64_t>();
+  } else if (fd_dtype == FDDataType::FP32) {
+    dt = pybind11::dtype::of<float>();
+  } else if (fd_dtype == FDDataType::FP64) {
+    dt = pybind11::dtype::of<double>();
+  } else if (fd_dtype == FDDataType::UINT8) {
+    dt = pybind11::dtype::of<uint8_t>();
+  } else if (fd_dtype == FDDataType::INT8) {
+    dt = pybind11::dtype::of<int8_t>();
+  } else if (fd_dtype == FDDataType::FP16) {
+    dt = pybind11::dtype::of<float16>();
+  } else {
+    FDASSERT(false, "The function doesn't support data type of %s.",
+                        Str(fd_dtype).c_str());
+  }
+  return dt;
+}
+
+FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) {
+  if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
+    return FDDataType::INT32;
+  } else if (np_dtype.is(pybind11::dtype::of<int64_t>())) {
+    return FDDataType::INT64;
+  } else if (np_dtype.is(pybind11::dtype::of<float>())) {
+    return FDDataType::FP32;
+  } else if (np_dtype.is(pybind11::dtype::of<double>())) {
+    return FDDataType::FP64;
+  } else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
+    return FDDataType::UINT8;
+  } else if (np_dtype.is(pybind11::dtype::of<int8_t>())) {
+    return FDDataType::INT8;
+  } else if (np_dtype.is(pybind11::dtype::of<float16>())) {
+    return FDDataType::FP16;
+  }
+  FDASSERT(false,
+           "NumpyDataTypeToFDDataType() only support "
+           "int8/int32/int64/float32/float64/float16 now.");
+  return FDDataType::FP32;
+}
+
+void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
+                     bool share_buffer) {
+  auto dtype = NumpyDataTypeToFDDataType(pyarray.dtype());
+  std::vector<int64_t> data_shape;
+  data_shape.insert(data_shape.begin(), pyarray.shape(),
+                    pyarray.shape() + pyarray.ndim());
+  if (share_buffer) {
+    tensor->SetExternalData(data_shape, dtype,
+                             pyarray.mutable_data());
+  } else {
+    tensor->Resize(data_shape, dtype);
+    memcpy(tensor->MutableData(), pyarray.mutable_data(), pyarray.nbytes());
+  }
+}
+
+void PyArrayToTensorList(std::vector<pybind11::array>& pyarrays, std::vector<FDTensor>* tensors,
+                     bool share_buffer) {
+  tensors->resize(pyarrays.size());
+  for(auto i = 0; i < pyarrays.size(); ++i) {
+    PyArrayToTensor(pyarrays[i], &(*tensors)[i], share_buffer);
+  }
+}
+
+pybind11::array TensorToPyArray(const FDTensor& tensor) {
+  auto numpy_dtype = FDDataTypeToNumpyDataType(tensor.dtype);
+  auto out = pybind11::array(numpy_dtype, tensor.shape);
+  memcpy(out.mutable_data(), tensor.CpuData(), tensor.Nbytes());
+  return out;
+}
+
+#ifdef ENABLE_VISION
+int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) {
+  if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
+    return CV_32S;
+  } else if (np_dtype.is(pybind11::dtype::of<int8_t>())) {
+    return CV_8S;
+  } else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
+    return CV_8U;
+  } else if (np_dtype.is(pybind11::dtype::of<float>())) {
+    return CV_32F;
+  } else {
+    FDASSERT(
+        false,
+        "NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 "
+        "now.");
+  }
+  return CV_8U;
+}
+
+int NumpyDataTypeToOpenCvTypeV2(pybind11::array& pyarray) {
+  if (pybind11::isinstance<pybind11::array_t<std::int32_t>>(pyarray)) {
+    return CV_32S;
+  } else if (pybind11::isinstance<pybind11::array_t<std::int8_t>>(pyarray)) {
+    return CV_8S;
+  } else if (pybind11::isinstance<pybind11::array_t<std::uint8_t>>(pyarray)) {
+    return CV_8U;
+  } else if (pybind11::isinstance<pybind11::array_t<std::float_t>>(pyarray)) {
+    return CV_32F;
+  } else {
+    FDASSERT(
+        false,
+        "NumpyDataTypeToOpenCvTypeV2() only support int32/int8/uint8/float32 "
+        "now.");
+  }
+  return CV_8U;
+}
+
+cv::Mat PyArrayToCvMat(pybind11::array& pyarray) {
+  // auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype());
+  auto cv_type = NumpyDataTypeToOpenCvTypeV2(pyarray);
+  FDASSERT(
+      pyarray.ndim() == 3,
+      "Require rank of array to be 3 with HWC format while converting it to "
+      "cv::Mat.");
+  int channel = *(pyarray.shape() + 2);
+  int height = *(pyarray.shape());
+  int width = *(pyarray.shape() + 1);
+  return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel),
+                 pyarray.mutable_data());
+}
+#endif
+
+PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
+  m.doc() =
+      "Make programer easier to deploy deeplearning model, save time to save "
+      "the world!";
+
+  m.def("set_logger", &SetLogger);
+
+  BindFDTensor(m);
+  BindRuntime(m);
+  BindFDModel(m);
+#ifdef ENABLE_VISION
+  auto vision_module =
+      m.def_submodule("vision", "Vision module of UltraInfer.");
+  BindVision(vision_module);
+  auto pipeline_module =
+      m.def_submodule("pipeline", "Pipeline module of UltraInfer.");
+  BindPipeline(pipeline_module);
+#endif
+
+#ifdef ENABLE_TEXT
+  auto text_module =
+      m.def_submodule("text", "Text module of UltraInfer.");
+  BindText(text_module);
+#endif
+}
+
+}  // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/pybind/main.h b/libs/ultrainfer/ultrainfer/pybind/main.h
new file mode 100755
index 0000000000..ed99bf02a7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pybind/main.h
@@ -0,0 +1,135 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <pybind11/eval.h>
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <type_traits>
+
+#include "ultrainfer/runtime/runtime.h"
+
+#ifdef ENABLE_VISION
+#include "ultrainfer/pipeline.h"
+#include "ultrainfer/vision.h"
+#endif
+
+#ifdef ENABLE_TEXT
+#include "ultrainfer/text.h"
+#endif
+
+#include "ultrainfer/core/float16.h"
+
+namespace ultrainfer {
+
+void BindBackend(pybind11::module &);
+void BindVision(pybind11::module &);
+void BindText(pybind11::module &m);
+void BindPipeline(pybind11::module &m);
+void BindRKNPU2Config(pybind11::module &);
+
+pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType &fd_dtype);
+
+FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype &np_dtype);
+
+void PyArrayToTensor(pybind11::array &pyarray, FDTensor *tensor,
+                     bool share_buffer = false);
+void PyArrayToTensorList(std::vector<pybind11::array> &pyarray,
+                         std::vector<FDTensor> *tensor,
+                         bool share_buffer = false);
+pybind11::array TensorToPyArray(const FDTensor &tensor);
+
+#ifdef ENABLE_VISION
+cv::Mat PyArrayToCvMat(pybind11::array &pyarray);
+#endif
+
+template <typename T> FDDataType CTypeToFDDataType() {
+  if (std::is_same<T, int32_t>::value) {
+    return FDDataType::INT32;
+  } else if (std::is_same<T, int64_t>::value) {
+    return FDDataType::INT64;
+  } else if (std::is_same<T, float>::value) {
+    return FDDataType::FP32;
+  } else if (std::is_same<T, double>::value) {
+    return FDDataType::FP64;
+  } else if (std::is_same<T, int8_t>::value) {
+    return FDDataType::INT8;
+  }
+  FDASSERT(false, "CTypeToFDDataType only support "
+                  "int8/int32/int64/float32/float64 now.");
+  return FDDataType::FP32;
+}
+
+template <typename T>
+std::vector<pybind11::array>
+PyBackendInfer(T &self, const std::vector<std::string> &names,
+               std::vector<pybind11::array> &data) {
+  std::vector<FDTensor> inputs(data.size());
+  for (size_t i = 0; i < data.size(); ++i) {
+    // TODO(jiangjiajun) here is considered to use user memory directly
+    auto dtype = NumpyDataTypeToFDDataType(data[i].dtype());
+    std::vector<int64_t> data_shape;
+    data_shape.insert(data_shape.begin(), data[i].shape(),
+                      data[i].shape() + data[i].ndim());
+    inputs[i].Resize(data_shape, dtype);
+    memcpy(inputs[i].MutableData(), data[i].mutable_data(), data[i].nbytes());
+    inputs[i].name = names[i];
+  }
+
+  std::vector<FDTensor> outputs(self.NumOutputs());
+  self.Infer(inputs, &outputs);
+
+  std::vector<pybind11::array> results;
+  results.reserve(outputs.size());
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
+    results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
+    memcpy(results[i].mutable_data(), outputs[i].Data(),
+           outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
+  }
+  return results;
+}
+
+} // namespace ultrainfer
+
+namespace pybind11 {
+namespace detail {
+
+// Note: use same enum number of float16 in numpy.
+// import numpy as np
+// print np.dtype(np.float16).num  # 23
+constexpr int NPY_FLOAT16_ = 23;
+
+// Note: Since float16 is not a builtin type in C++, we register
+// ultrainfer::float16 as numpy.float16.
+// Ref: https://github.com/pybind/pybind11/issues/1776
+template <> struct npy_format_descriptor<ultrainfer::float16> {
+  static pybind11::dtype dtype() {
+    handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_);
+    return reinterpret_borrow<pybind11::dtype>(ptr);
+  }
+  static std::string format() {
+    // Note: "e" represents float16.
+    // Details at:
+    // https://docs.python.org/3/library/struct.html#format-characters.
+    return "e";
+  }
+  static constexpr auto name = _("float16");
+};
+
+} // namespace detail
+} // namespace pybind11
diff --git a/libs/ultrainfer/ultrainfer/pybind/runtime.cc b/libs/ultrainfer/ultrainfer/pybind/runtime.cc
new file mode 100755
index 0000000000..43797f58da
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/pybind/runtime.cc
@@ -0,0 +1,172 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindOption(pybind11::module &m);
+
+void BindRuntime(pybind11::module &m) {
+  BindOption(m);
+
+  pybind11::class_<TensorInfo>(m, "TensorInfo")
+      .def_readwrite("name", &TensorInfo::name)
+      .def_readwrite("shape", &TensorInfo::shape)
+      .def_readwrite("dtype", &TensorInfo::dtype);
+
+  pybind11::class_<Runtime>(m, "Runtime")
+      .def(pybind11::init())
+      .def("init", &Runtime::Init)
+      .def("compile",
+           [](Runtime &self,
+              std::vector<std::vector<pybind11::array>> &warm_datas,
+              const RuntimeOption &_option) {
+             size_t rows = warm_datas.size();
+             size_t columns = warm_datas[0].size();
+             std::vector<std::vector<FDTensor>> warm_tensors(
+                 rows, std::vector<FDTensor>(columns));
+             for (size_t i = 0; i < rows; ++i) {
+               for (size_t j = 0; j < columns; ++j) {
+                 auto dtype =
+                     NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
+                 std::vector<int64_t> data_shape;
+                 data_shape.insert(data_shape.begin(), warm_datas[i][j].shape(),
+                                   warm_datas[i][j].shape() +
+                                       warm_datas[i][j].ndim());
+                 warm_tensors[i][j].Resize(data_shape, dtype);
+                 memcpy(warm_tensors[i][j].MutableData(),
+                        warm_datas[i][j].mutable_data(),
+                        warm_datas[i][j].nbytes());
+               }
+             }
+             return self.Compile(warm_tensors);
+           })
+      .def("infer",
+           [](Runtime &self, std::map<std::string, pybind11::array> &data) {
+             std::vector<FDTensor> inputs(data.size());
+             int index = 0;
+             for (auto iter = data.begin(); iter != data.end(); ++iter) {
+               std::vector<int64_t> data_shape;
+               data_shape.insert(data_shape.begin(), iter->second.shape(),
+                                 iter->second.shape() + iter->second.ndim());
+               auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
+               // TODO(jiangjiajun) Maybe skip memory copy is a better choice
+               // use SetExternalData
+               inputs[index].Resize(data_shape, dtype);
+               memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
+                      iter->second.nbytes());
+               inputs[index].name = iter->first;
+               index += 1;
+             }
+
+             std::vector<FDTensor> outputs(self.NumOutputs());
+             self.Infer(inputs, &outputs);
+
+             std::vector<pybind11::array> results;
+             results.reserve(outputs.size());
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
+               results.emplace_back(
+                   pybind11::array(numpy_dtype, outputs[i].shape));
+               memcpy(results[i].mutable_data(), outputs[i].Data(),
+                      outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
+             }
+             return results;
+           })
+      .def("infer",
+           [](Runtime &self, std::map<std::string, FDTensor> &data) {
+             std::vector<FDTensor> inputs;
+             inputs.reserve(data.size());
+             for (auto iter = data.begin(); iter != data.end(); ++iter) {
+               FDTensor tensor;
+               tensor.SetExternalData(iter->second.Shape(),
+                                      iter->second.Dtype(), iter->second.Data(),
+                                      iter->second.device);
+               tensor.name = iter->first;
+               inputs.push_back(tensor);
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Infer(inputs, &outputs)) {
+               throw std::runtime_error("Failed to inference with Runtime.");
+             }
+             return outputs;
+           })
+      .def("infer",
+           [](Runtime &self, std::vector<FDTensor> &inputs) {
+             std::vector<FDTensor> outputs;
+             self.Infer(inputs, &outputs);
+             return outputs;
+           })
+      .def("bind_input_tensor", &Runtime::BindInputTensor)
+      .def("bind_output_tensor", &Runtime::BindOutputTensor)
+      .def("infer", [](Runtime &self) { self.Infer(); })
+      .def("get_output_tensor",
+           [](Runtime &self, const std::string &name) {
+             FDTensor *output = self.GetOutputTensor(name);
+             if (output == nullptr) {
+               return pybind11::cast(nullptr);
+             }
+             return pybind11::cast(*output);
+           })
+      .def("num_inputs", &Runtime::NumInputs)
+      .def("num_outputs", &Runtime::NumOutputs)
+      .def("get_input_info", &Runtime::GetInputInfo)
+      .def("get_output_info", &Runtime::GetOutputInfo)
+      .def("get_profile_time", &Runtime::GetProfileTime)
+      .def_readonly("option", &Runtime::option);
+
+  pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
+                           "Backend for inference.")
+      .value("UNKOWN", Backend::UNKNOWN)
+      .value("ORT", Backend::ORT)
+      .value("TRT", Backend::TRT)
+      .value("POROS", Backend::POROS)
+      .value("PDINFER", Backend::PDINFER)
+      .value("RKNPU2", Backend::RKNPU2)
+      .value("SOPHGOTPU", Backend::SOPHGOTPU)
+      .value("TVM", Backend::TVM)
+      .value("LITE", Backend::LITE);
+  pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
+                               "ModelFormat for inference.")
+      .value("PADDLE", ModelFormat::PADDLE)
+      .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
+      .value("RKNN", ModelFormat::RKNN)
+      .value("SOPHGO", ModelFormat::SOPHGO)
+      .value("ONNX", ModelFormat::ONNX)
+      .value("TVMFormat", ModelFormat::TVMFormat);
+  pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
+                          "Device for inference.")
+      .value("CPU", Device::CPU)
+      .value("GPU", Device::GPU)
+      .value("IPU", Device::IPU)
+      .value("RKNPU", Device::RKNPU)
+      .value("SOPHGOTPU", Device::SOPHGOTPUD);
+
+  pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
+                              "Data type of UltraInfer.")
+      .value("BOOL", FDDataType::BOOL)
+      .value("INT8", FDDataType::INT8)
+      .value("INT16", FDDataType::INT16)
+      .value("INT32", FDDataType::INT32)
+      .value("INT64", FDDataType::INT64)
+      .value("FP16", FDDataType::FP16)
+      .value("FP32", FDDataType::FP32)
+      .value("FP64", FDDataType::FP64)
+      .value("UINT8", FDDataType::UINT8);
+
+  m.def("get_available_backends", []() { return GetAvailableBackends(); });
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime.h b/libs/ultrainfer/ultrainfer/runtime.h
new file mode 100755
index 0000000000..ccd529dd34
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file runtime.h
+    \brief A brief file description.
+
+    More details
+ */
+
+#pragma once
+#include "ultrainfer/core/config.h"
+#include "ultrainfer/runtime/runtime.h"
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/backend.h
new file mode 100755
index 0000000000..ec9b720381
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/backend.h
@@ -0,0 +1,158 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "ultrainfer/benchmark/benchmark.h"
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/core/fd_type.h"
+#include "ultrainfer/runtime/runtime_option.h"
+
+namespace ultrainfer {
+
+/*! @brief Information of Tensor
+ */
+struct TensorInfo {
+  std::string name;       ///< Name of tensor
+  std::vector<int> shape; ///< Shape of tensor
+  FDDataType dtype;       ///< Data type of tensor
+
+  friend std::ostream &operator<<(std::ostream &output,
+                                  const TensorInfo &info) {
+    output << "TensorInfo(name: " << info.name << ", shape: [";
+    for (size_t i = 0; i < info.shape.size(); ++i) {
+      if (i == info.shape.size() - 1) {
+        output << info.shape[i];
+      } else {
+        output << info.shape[i] << ", ";
+      }
+    }
+    output << "], dtype: " << Str(info.dtype) << ")";
+    return output;
+  }
+};
+
+class BaseBackend {
+public:
+  bool initialized_ = false;
+
+  BaseBackend() {}
+  virtual ~BaseBackend() = default;
+
+  virtual bool Initialized() const { return initialized_; }
+
+  virtual bool Init(const RuntimeOption &option) {
+    FDERROR << "Not Implement for " << option.backend << " in " << option.device
+            << "." << std::endl;
+    return false;
+  }
+
+  // Get number of inputs of the model
+  virtual int NumInputs() const = 0;
+  // Get number of outputs of the model
+  virtual int NumOutputs() const = 0;
+  // Get information of input tensor
+  virtual TensorInfo GetInputInfo(int index) = 0;
+  // Get information of output tensor
+  virtual TensorInfo GetOutputInfo(int index) = 0;
+  // Get information of all the input tensors
+  virtual std::vector<TensorInfo> GetInputInfos() = 0;
+  // Get information of all the output tensors
+  virtual std::vector<TensorInfo> GetOutputInfos() = 0;
+
+  // if copy_to_fd is true, copy memory data to FDTensor
+  // else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
+  virtual bool Infer(std::vector<FDTensor> &inputs,
+                     std::vector<FDTensor> *outputs,
+                     bool copy_to_fd = true) = 0;
+  // Optional: For those backends which can share memory
+  // while creating multiple inference engines with same model file
+  virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
+                                             void *stream = nullptr,
+                                             int device_id = -1) {
+    FDERROR << "Clone no support " << runtime_option.backend << " " << stream
+            << " " << device_id << std::endl;
+    return nullptr;
+  }
+
+  benchmark::BenchmarkOption benchmark_option_;
+  benchmark::BenchmarkResult benchmark_result_;
+};
+
+/** \brief Macros for Runtime benchmark profiling.
+ * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
+ * indicates that the least number of times the loop
+ * will repeat when profiling mode is not enabled.
+ * In most cases, the value should be 1, i.e., results are
+ * obtained by running the inference process once, when
+ * the profile mode is turned off, such as ONNX Runtime,
+ * OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
+ * RKNPU2, SOPHGO etc.
+ *
+ * example code @code
+ * // OpenVINOBackend::Infer
+ * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+ * // do something ....
+ * RUNTIME_PROFILE_LOOP_BEGIN(1)
+ * // The codes which wrapped by 'BEGIN(1) ~ END' scope
+ * // will only run once when profiling mode is not enabled.
+ * request_.infer();
+ * RUNTIME_PROFILE_LOOP_END
+ * // do something ....
+ * RUNTIME_PROFILE_LOOP_H2D_D2H_END
+ *
+ * @endcode In this case, No global variables inside a function
+ * are wrapped by BEGIN and END, which may be required for
+ * subsequent tasks. But, some times we need to set 'base_loop'
+ * as 0, such as POROS.
+ *
+ * * example code @code
+ * // PorosBackend::Infer
+ * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+ * // do something ....
+ * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
+ * // The codes which wrapped by 'BEGIN(0) ~ END' scope
+ * // will not run when profiling mode is not enabled.
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
+ * RUNTIME_PROFILE_LOOP_END
+ * // Run another inference beyond the scope of 'BEGIN ~ END'
+ * // to get valid outputs for subsequent tasks.
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
+ * // do something .... will use 'poros_outputs' ...
+ * if (poros_outputs.isTensor()) {
+ * // ...
+ * }
+ * RUNTIME_PROFILE_LOOP_H2D_D2H_END
+ *
+ * @endcode In this case, 'poros_outputs' inside a function
+ * are wrapped by BEGIN and END, which may be required for
+ * subsequent tasks. So, we set 'base_loop' as 0 and lanuch
+ * another infer to get the valid outputs beyond the scope
+ * of 'BEGIN ~ END' for subsequent tasks.
+ */
+
+#define RUNTIME_PROFILE_LOOP_BEGIN(base_loop)                                  \
+  __RUNTIME_PROFILE_LOOP_BEGIN(benchmark_option_, (base_loop))
+#define RUNTIME_PROFILE_LOOP_END __RUNTIME_PROFILE_LOOP_END(benchmark_result_)
+#define RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN                                     \
+  __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(benchmark_option_, 1)
+#define RUNTIME_PROFILE_LOOP_H2D_D2H_END                                       \
+  __RUNTIME_PROFILE_LOOP_H2D_D2H_END(benchmark_result_)
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu
new file mode 100755
index 0000000000..c60d274fb0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.cu
@@ -0,0 +1,99 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef WITH_GPU
+
+#include "adaptive_pool2d_kernel.h"
+
+namespace ultrainfer {
+template <typename T1, typename T2>
+__global__ void CudaCastKernel(const T1 *in, T2 *out, int edge,
+                               int out_bc_offset, int in_bc_offset, int ih,
+                               int iw, int oh, int ow, bool is_avg) {
+  int position = blockDim.x * blockIdx.x + threadIdx.x;
+  if (position >= edge) {
+    return;
+  }
+  int offset = floorf(float(position) / out_bc_offset);
+  int h = floorf(float(position % out_bc_offset) / ow);
+  int w = (position % out_bc_offset) % ow;
+  int hstart = floorf(static_cast<float>(h * ih) / oh);
+  int hend = ceilf(static_cast<float>((h + 1) * ih) / oh);
+  int wstart = floorf(static_cast<float>(w * iw) / ow);
+  int wend = ceilf(static_cast<float>((w + 1) * iw) / ow);
+  float ele_val = 0.0;
+  if (is_avg) {
+    ele_val = 0.0;
+  } else {
+    ele_val =
+        static_cast<float>(in[offset * in_bc_offset + hstart * iw + wstart]);
+  }
+  for (int h = hstart; h < hend; ++h) {
+    for (int w = wstart; w < wend; ++w) {
+      int input_idx = h * iw + w;
+      if (is_avg) {
+        ele_val =
+            ele_val + static_cast<float>(in[offset * in_bc_offset + input_idx]);
+      } else {
+        ele_val =
+            (ele_val >
+             static_cast<float>(in[offset * in_bc_offset + input_idx]))
+                ? ele_val
+                : static_cast<float>(in[offset * in_bc_offset + input_idx]);
+      }
+    }
+  }
+  out[position] = static_cast<T2>(
+      ele_val / static_cast<float>(((hend - hstart) * (wend - wstart))));
+}
+
+void CudaAdaptivePool(const std::vector<int64_t> &input_dims,
+                      const std::vector<int64_t> &output_dims, void *output,
+                      const void *input, void *compute_stream,
+                      const std::string &pooling_type, const std::string &dtype,
+                      const std::string &out_dtype) {
+  auto casted_compute_stream = reinterpret_cast<cudaStream_t>(compute_stream);
+  int out_bc_offset = output_dims[2] * output_dims[3];
+  int in_bc_offset = input_dims[2] * input_dims[3];
+  int jobs = 1;
+  for (int i : output_dims) {
+    jobs *= i;
+  }
+  bool is_avg = pooling_type == "avg";
+  int threads = 256;
+  int blocks = ceil(jobs / static_cast<float>(threads));
+  if (dtype == "float") {
+    CudaCastKernel<float, float><<<blocks, threads, 0, casted_compute_stream>>>(
+        static_cast<const float *>(input), static_cast<float *>(output), jobs,
+        out_bc_offset, in_bc_offset, int(input_dims[2]), int(input_dims[3]),
+        int(output_dims[2]), int(output_dims[3]), is_avg);
+  } else if (dtype == "half") {
+    if (out_dtype == "half") {
+      CudaCastKernel<half, half><<<blocks, threads, 0, casted_compute_stream>>>(
+          static_cast<const half *>(input), static_cast<half *>(output), jobs,
+          out_bc_offset, in_bc_offset, int(input_dims[2]), int(input_dims[3]),
+          int(output_dims[2]), int(output_dims[3]), is_avg);
+    }
+    if (out_dtype == "float") {
+      CudaCastKernel<half, float>
+          <<<blocks, threads, 0, casted_compute_stream>>>(
+              static_cast<const half *>(input), static_cast<float *>(output),
+              jobs, out_bc_offset, in_bc_offset, int(input_dims[2]),
+              int(input_dims[3]), int(output_dims[2]), int(output_dims[3]),
+              is_avg);
+    }
+  }
+}
+} // namespace ultrainfer
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h
new file mode 100755
index 0000000000..10899e96b3
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h
@@ -0,0 +1,35 @@
+
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cuda.h>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+#include <iostream>
+#include <math.h>
+#include <vector>
+
+namespace ultrainfer {
+
+void CudaAdaptivePool(const std::vector<int64_t> &input_dims,
+                      const std::vector<int64_t> &output_dims, void *output,
+                      const void *input, void *compute_stream,
+                      const std::string &pooling_type,
+                      const std::string &dtype = "float",
+                      const std::string &out_dtype = "float");
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc
new file mode 100755
index 0000000000..902014b165
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.cc
@@ -0,0 +1,399 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/horizon/horizon_backend.h"
+namespace ultrainfer {
+
+HorizonBackend::~HorizonBackend() {
+  int ret = -1;
+  // Release memory uniformly here
+  if (input_properties_ != nullptr) {
+    free(input_properties_);
+  }
+  if (output_properties_ != nullptr) {
+    free(output_properties_);
+  }
+  if (input_mems_ == nullptr) {
+    return;
+  }
+  for (int i = 0; i < NumInputs(); i++) {
+
+    ret = hbSysFreeMem(&(input_mems_[i].sysMem[0]));
+
+    if (ret != 0) {
+      FDERROR << "release input mem fail! ret=" << ret << std::endl;
+    }
+    if (input_mems_ != nullptr) {
+      free(input_mems_);
+    }
+  }
+
+  for (int i = 0; i < NumOutputs(); i++) {
+    ret = hbSysFreeMem(&(output_mems_[i].sysMem[0]));
+
+    if (ret != 0) {
+      FDERROR << "release output mem fail! ret=" << ret << std::endl;
+    }
+    if (output_mems_ != nullptr) {
+      free(output_mems_);
+    }
+  }
+  ret = hbDNNRelease(packed_dnn_handle_);
+  if (ret != 0) {
+    FDERROR << "hbDNNRelease  fail! ret=" << ret << std::endl;
+  }
+}
+
+bool HorizonBackend::GetModelInputOutputInfos() {
+  const char **model_name_list;
+  int model_count = 0;
+  int ret;
+  // get model name
+  ret =
+      hbDNNGetModelNameList(&model_name_list, &model_count, packed_dnn_handle_);
+  if (ret != 0) {
+    FDERROR << "get model name fail! ret=" << ret << std::endl;
+    return false;
+  }
+  // get dnn handle
+  ret =
+      hbDNNGetModelHandle(&dnn_handle_, packed_dnn_handle_, model_name_list[0]);
+  if (ret != 0) {
+    FDERROR << "get dnn handle fail! ret=" << ret << std::endl;
+    return false;
+  }
+  // get input infos
+  // Get detailed input parameters
+  int input_count = 0;
+  ret = hbDNNGetInputCount(&input_count, dnn_handle_);
+  if (ret != 0) {
+    FDERROR << "get input count fail! ret=" << ret << std::endl;
+    return false;
+  }
+  input_properties_ = (hbDNNTensorProperties *)malloc(
+      sizeof(hbDNNTensorProperties) * input_count);
+  memset(input_properties_, 0, input_count * sizeof(hbDNNTensorProperties));
+
+  inputs_desc_.resize(input_count);
+
+  // get input info and copy to input tensor info
+  for (uint32_t i = 0; i < input_count; i++) {
+    ret = hbDNNGetInputTensorProperties(&input_properties_[i], dnn_handle_, i);
+
+    if (ret != 0) {
+      FDERROR << "get input tensor properties fail! ret=" << ret << std::endl;
+      return false;
+    }
+
+    if ((input_properties_[i].tensorLayout != HB_DNN_LAYOUT_NHWC)) {
+      FDERROR << "horizon_backend only support input layout is NHWC"
+              << std::endl;
+    }
+    if (input_properties_[i].tensorType != HB_DNN_IMG_TYPE_RGB) {
+      FDERROR << "horizon_backend only support input format is RGB"
+              << std::endl;
+    }
+
+    const char *name;
+
+    ret = hbDNNGetInputName(&name, dnn_handle_, i);
+    if (ret != 0) {
+      FDERROR << "get input tensor name fail! ret=" << ret << std::endl;
+      return false;
+    }
+    // copy input proper to input tensor info
+    std::string temp_name = name;
+    std::vector<int> temp_shape{};
+    int n_dims = input_properties_[i].validShape.numDimensions;
+
+    temp_shape.resize(n_dims);
+    for (int j = 0; j < n_dims; j++) {
+      temp_shape[j] = (int)input_properties_[i].validShape.dimensionSize[j];
+    }
+
+    // Only support RGB format, so input type is UINT8
+    FDDataType temp_dtype = FDDataType::UINT8;
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    inputs_desc_[i] = temp_input_info;
+  }
+
+  // get output infos
+  // Get detailed output parameters
+  int output_count = 0;
+  ret = hbDNNGetOutputCount(&output_count, dnn_handle_);
+  if (ret != 0) {
+    FDERROR << "get output count fail! ret=" << ret << std::endl;
+    return false;
+  }
+  output_properties_ = (hbDNNTensorProperties *)malloc(
+      sizeof(hbDNNTensorProperties) * output_count);
+  memset(output_properties_, 0, output_count * sizeof(hbDNNTensorProperties));
+
+  outputs_desc_.resize(output_count);
+
+  for (uint32_t i = 0; i < output_count; i++) {
+    // get model output size
+    ret =
+        hbDNNGetOutputTensorProperties(&output_properties_[i], dnn_handle_, i);
+
+    const char *name;
+    ret = hbDNNGetOutputName(&name, dnn_handle_, i);
+    if (ret != 0) {
+      FDERROR << "get output tensor name fail! ret=" << ret << std::endl;
+      return false;
+    }
+
+    // copy output proper to output tensor info
+    std::string temp_name = name;
+    std::vector<int> temp_shape{};
+    int n_dims = output_properties_[i].validShape.numDimensions;
+
+    if ((n_dims == 4) &&
+        (output_properties_[i].validShape.dimensionSize[3] == 1)) {
+      n_dims--;
+    }
+    temp_shape.resize(n_dims);
+    for (int j = 0; j < n_dims; j++) {
+      temp_shape[j] = (int)output_properties_[i].validShape.dimensionSize[j];
+    }
+
+    FDDataType temp_dtype =
+        HorizonTensorTypeToFDDataType(output_properties_[i].tensorType);
+
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    outputs_desc_[i] = temp_input_info;
+  }
+
+  return true;
+}
+
+TensorInfo HorizonBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of inputs: %d.", index,
+           NumInputs());
+  return inputs_desc_[index];
+}
+
+std::vector<TensorInfo> HorizonBackend::GetInputInfos() { return inputs_desc_; }
+
+TensorInfo HorizonBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs %d.", index,
+           NumOutputs());
+
+  return outputs_desc_[index];
+}
+
+std::vector<TensorInfo> HorizonBackend::GetOutputInfos() {
+  return outputs_desc_;
+}
+
+bool HorizonBackend::LoadModel(const char *model) {
+  int ret = -1;
+  ret = hbDNNInitializeFromFiles(&packed_dnn_handle_, &model, 1);
+  if (ret != 0) {
+    FDERROR << "horizon_init fail! ret=" << ret << std::endl;
+    return false;
+  }
+  return true;
+}
+bool HorizonBackend::Init(const RuntimeOption &runtime_option) {
+  // Init model from file
+  if (!LoadModel((char *)runtime_option.model_file.data())) {
+    FDERROR << "load model failed" << std::endl;
+    return false;
+  }
+
+  // GetModelInputOutputInfos
+  if (!GetModelInputOutputInfos()) {
+    FDERROR << "get model input output infos failed" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool HorizonBackend::Infer(std::vector<FDTensor> &inputs,
+                           std::vector<FDTensor> *outputs, bool copy_to_fd) {
+
+  // Judge whether the input and output size are the same
+  if (inputs.size() != inputs_desc_.size()) {
+    FDERROR << "[HorizonBackend] Size of the inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << inputs_desc_.size() << ")." << std::endl;
+    return false;
+  }
+  RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+  int ret = -1;
+  if (!infer_init_) {
+    // Create input tensor memory
+    int input_count = NumInputs();
+    int output_count = NumOutputs();
+
+    input_mems_ = (hbDNNTensor *)malloc(sizeof(hbDNNTensor) * input_count);
+    output_mems_ = (hbDNNTensor *)malloc(sizeof(hbDNNTensor) * output_count);
+
+    for (uint32_t i = 0; i < input_count; i++) {
+      input_mems_[i].properties = input_properties_[i];
+
+      input_mems_[i].properties.alignedShape =
+          input_mems_[i].properties.validShape;
+
+      auto current_shape = GetInputInfo(i).shape;
+      auto &mem = input_mems_[i].sysMem[0];
+      int intput_memSize = input_properties_[i].alignedByteSize;
+
+      ret = hbSysAllocCachedMem(&mem, intput_memSize);
+
+      if (ret != 0) {
+        FDERROR << "hbSysAllocCachedMem fails." << std::endl;
+        return false;
+      }
+    }
+
+    for (uint32_t i = 0; i < output_count; i++) {
+
+      output_mems_[i].properties = output_properties_[i];
+
+      auto current_shape = GetOutputInfo(i).shape;
+      auto &mem = output_mems_[i].sysMem[0];
+      int output_memSize = output_properties_[i].alignedByteSize;
+
+      ret = hbSysAllocCachedMem(&mem, output_memSize);
+      if (ret != 0) {
+        FDERROR << "hbSysAllocCachedMem fails." << std::endl;
+        return false;
+      }
+    }
+    infer_init_ = true;
+  }
+  // Copy input data to input tensor memory
+  for (uint32_t i = 0; i < NumInputs(); i++) {
+    if (inputs[i].Data() == nullptr) {
+      FDERROR << "inputs[i].Data is NULL." << std::endl;
+      return false;
+    }
+    auto &mem = input_mems_[i].sysMem[0];
+
+    memcpy(mem.virAddr, inputs[i].Data(), inputs[i].Nbytes());
+    ret = hbSysFlushMem(&mem, HB_SYS_MEM_CACHE_CLEAN);
+    if (ret != 0) {
+      FDERROR << "hbSysFlushMem fails." << std::endl;
+      return false;
+    }
+  }
+
+  hbDNNTaskHandle_t task_handle = nullptr;
+  hbDNNInferCtrlParam infer_ctrl_param;
+  HB_DNN_INITIALIZE_INFER_CTRL_PARAM(&infer_ctrl_param);
+
+  RUNTIME_PROFILE_LOOP_BEGIN(1)
+  ret = hbDNNInfer(&task_handle, &output_mems_, input_mems_, dnn_handle_,
+                   &infer_ctrl_param);
+  RUNTIME_PROFILE_LOOP_END
+  if (ret != 0) {
+    FDERROR << "hbDNNInference fails." << std::endl;
+    return false;
+  }
+  ret = hbDNNWaitTaskDone(task_handle, 0);
+  if (ret != 0) {
+    FDERROR << "hbDNNWaitTaskDone fails." << std::endl;
+    return false;
+  }
+  ret = hbDNNReleaseTask(task_handle);
+  if (ret != 0) {
+    FDERROR << "hbDNNReleaseTask fails." << std::endl;
+    return false;
+  }
+  // get result
+  outputs->resize(outputs_desc_.size());
+  std::vector<int64_t> temp_shape(4);
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    temp_shape.resize(outputs_desc_[i].shape.size());
+    for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
+      temp_shape[j] = outputs_desc_[i].shape[j];
+    }
+    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
+                         outputs_desc_[i].name);
+
+    hbSysFlushMem(&(output_mems_[i].sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE);
+    auto data = (float *)(output_mems_[i].sysMem[0].virAddr);
+
+    auto shift = output_mems_[i].properties.shift.shiftData;
+    auto scale = output_mems_[i].properties.scale.scaleData;
+
+    for (int j = 0; j < (*outputs)[i].Nbytes(); j++) {
+      if (output_mems_[i].properties.quantiType == SHIFT) {
+        data[j] = data[j] / (1 << shift[j]);
+      } else if (output_mems_[i].properties.quantiType == SCALE) {
+        data[j] = data[j] * scale[j];
+      }
+    }
+
+    memcpy((*outputs)[i].MutableData(),
+           (float *)output_mems_[i].sysMem[0].virAddr, (*outputs)[i].Nbytes());
+  }
+  RUNTIME_PROFILE_LOOP_H2D_D2H_END
+  return true;
+}
+
+FDDataType HorizonBackend::HorizonTensorTypeToFDDataType(int32_t type) {
+  if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_F16) {
+    return FDDataType::FP16;
+  }
+  if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_F32) {
+    return FDDataType::FP32;
+  }
+  if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S8) {
+    return FDDataType::INT8;
+  }
+  if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S16) {
+    return FDDataType::INT16;
+  }
+  if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S32) {
+    return FDDataType::INT32;
+  }
+  if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_U8) {
+    return FDDataType::UINT8;
+  }
+
+  FDERROR << "FDDataType don't support this type" << std::endl;
+  return FDDataType::UNKNOWN1;
+}
+
+hbDNNDataType HorizonBackend::FDDataTypeToHorizonTensorType(FDDataType type) {
+  if (type == FDDataType::FP16) {
+    return hbDNNDataType::HB_DNN_TENSOR_TYPE_F16;
+  }
+  if (type == FDDataType::FP32) {
+    return hbDNNDataType::HB_DNN_TENSOR_TYPE_F32;
+  }
+  if (type == FDDataType::INT8) {
+    return hbDNNDataType::HB_DNN_TENSOR_TYPE_S8;
+  }
+  if (type == FDDataType::INT16) {
+    return hbDNNDataType::HB_DNN_TENSOR_TYPE_S16;
+  }
+  if (type == FDDataType::INT32) {
+    return hbDNNDataType::HB_DNN_TENSOR_TYPE_S32;
+  }
+  if (type == FDDataType::UINT8) {
+    return hbDNNDataType::HB_DNN_TENSOR_TYPE_U8;
+  }
+  FDERROR << "horizon_tensor_type don't support this type" << std::endl;
+
+  return hbDNNDataType::HB_DNN_TENSOR_TYPE_MAX;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h
new file mode 100755
index 0000000000..1073b2e2d0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/horizon/horizon_backend.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "dnn/hb_dnn.h"
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/runtime/backends/backend.h"
+
+namespace ultrainfer {
+class HorizonBackend : public BaseBackend {
+public:
+  HorizonBackend() = default;
+  ~HorizonBackend();
+
+  // Horizon Backend implementation.
+  bool Init(const RuntimeOption &runtime_option);
+
+  int NumInputs() const override {
+    return static_cast<int>(inputs_desc_.size());
+  }
+
+  int NumOutputs() const override {
+    return static_cast<int>(outputs_desc_.size());
+  }
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+
+private:
+  hbPackedDNNHandle_t packed_dnn_handle_;
+  hbDNNHandle_t dnn_handle_;
+  hbDNNTensorProperties *input_properties_ = nullptr;
+  hbDNNTensorProperties *output_properties_ = nullptr;
+  hbDNNTensor *input_mems_;
+  hbDNNTensor *output_mems_;
+
+  bool infer_init_ = false;
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+  bool GetModelInputOutputInfos();
+  bool LoadModel(const char *model);
+
+  static FDDataType HorizonTensorTypeToFDDataType(int32_t type);
+  static hbDNNDataType FDDataTypeToHorizonTensorType(FDDataType type);
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc b/libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc
new file mode 100755
index 0000000000..183e65de4a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/configure_hardware.cc
@@ -0,0 +1,171 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/lite/lite_backend.h"
+
+#include <cstring>
+
+namespace ultrainfer {
+
+#if defined(__arm__) || defined(__aarch64__)
+#define FD_LITE_HOST TARGET(kARM)
+#elif defined(__x86_64__)
+#define FD_LITE_HOST TARGET(kX86)
+#endif
+
+std::vector<paddle::lite_api::Place>
+GetPlacesForCpu(const LiteBackendOption &option) {
+  std::vector<paddle::lite_api::Place> valid_places;
+  valid_places.push_back(
+      paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kInt8)});
+  if (option.enable_fp16) {
+    paddle::lite_api::MobileConfig check_fp16_config;
+    if (check_fp16_config.check_fp16_valid()) {
+      valid_places.push_back(
+          paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFP16)});
+    } else {
+      FDWARNING << "Current CPU doesn't support float16 precision, will "
+                   "fallback to float32."
+                << std::endl;
+    }
+  }
+  valid_places.push_back(
+      paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFloat)});
+  return valid_places;
+}
+
+void LiteBackend::ConfigureCpu(const LiteBackendOption &option) {
+  config_.set_valid_places(GetPlacesForCpu(option));
+}
+
+void LiteBackend::ConfigureGpu(const LiteBackendOption &option) {
+  std::vector<paddle::lite_api::Place> valid_places;
+  if (option.enable_fp16) {
+    valid_places.emplace_back(paddle::lite_api::Place{
+        TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)});
+    valid_places.emplace_back(paddle::lite_api::Place{
+        TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageFolder)});
+  }
+  valid_places.emplace_back(
+      paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kFloat)});
+  valid_places.emplace_back(paddle::lite_api::Place{
+      TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)});
+  valid_places.emplace_back(paddle::lite_api::Place{
+      TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageFolder)});
+  valid_places.emplace_back(
+      paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kAny)});
+  valid_places.emplace_back(
+      paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kInt32)});
+  valid_places.emplace_back(
+      paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
+  valid_places.emplace_back(
+      paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
+  config_.set_valid_places(valid_places);
+}
+
+void LiteBackend::ConfigureKunlunXin(const LiteBackendOption &option) {
+  std::vector<paddle::lite_api::Place> valid_places;
+  // TODO(yeliang): Placing kInt8 first may cause accuracy issues of some model
+  // valid_places.push_back(
+  //     paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
+  if (option.enable_fp16) {
+    valid_places.push_back(
+        paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
+  }
+  valid_places.push_back(
+      paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
+
+  config_.set_xpu_dev_per_thread(option.device_id);
+  config_.set_xpu_workspace_l3_size_per_thread(
+      option.kunlunxin_l3_workspace_size);
+  config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size,
+                                  option.kunlunxin_locked);
+  config_.set_xpu_l3_cache_autotune(option.kunlunxin_autotune);
+  config_.set_xpu_conv_autotune(option.kunlunxin_autotune,
+                                option.kunlunxin_autotune_file);
+  config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
+                                       option.kunlunxin_adaptive_seqlen);
+  config_.set_xpu_gm_workspace_method(option.kunlunxin_gm_default_size);
+  if (option.kunlunxin_enable_multi_stream) {
+    config_.enable_xpu_multi_stream();
+  }
+  auto cpu_places = GetPlacesForCpu(option);
+  valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
+  config_.set_valid_places(valid_places);
+}
+
+void LiteBackend::ConfigureTimvx(const LiteBackendOption &option) {
+  config_.set_nnadapter_device_names({"verisilicon_timvx"});
+  std::vector<paddle::lite_api::Place> valid_places;
+  valid_places.push_back(
+      paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
+  valid_places.push_back(
+      paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
+  auto cpu_places = GetPlacesForCpu(option);
+  valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
+  config_.set_valid_places(valid_places);
+  ConfigureNNAdapter(option);
+}
+
+void LiteBackend::ConfigureAscend(const LiteBackendOption &option) {
+  config_.set_nnadapter_device_names({"huawei_ascend_npu"});
+  std::vector<paddle::lite_api::Place> valid_places;
+  valid_places.push_back(
+      paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
+  valid_places.push_back(
+      paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
+  auto cpu_places = GetPlacesForCpu(option);
+  valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
+  config_.set_valid_places(valid_places);
+  ConfigureNNAdapter(option);
+}
+
+void LiteBackend::ConfigureNNAdapter(const LiteBackendOption &option) {
+  if (!option.nnadapter_subgraph_partition_config_path.empty()) {
+    std::vector<char> nnadapter_subgraph_partition_config_buffer;
+    if (ReadFile(option.nnadapter_subgraph_partition_config_path,
+                 &nnadapter_subgraph_partition_config_buffer, false)) {
+      if (!nnadapter_subgraph_partition_config_buffer.empty()) {
+        std::string nnadapter_subgraph_partition_config_string(
+            nnadapter_subgraph_partition_config_buffer.data(),
+            nnadapter_subgraph_partition_config_buffer.size());
+        config_.set_nnadapter_subgraph_partition_config_buffer(
+            nnadapter_subgraph_partition_config_string);
+      }
+    }
+  }
+
+  if (!option.nnadapter_context_properties.empty()) {
+    config_.set_nnadapter_context_properties(
+        option.nnadapter_context_properties);
+  }
+
+  if (!option.nnadapter_model_cache_dir.empty()) {
+    config_.set_nnadapter_model_cache_dir(option.nnadapter_model_cache_dir);
+  }
+
+  if (!option.nnadapter_mixed_precision_quantization_config_path.empty()) {
+    config_.set_nnadapter_mixed_precision_quantization_config_path(
+        option.nnadapter_mixed_precision_quantization_config_path);
+  }
+
+  if (!option.nnadapter_subgraph_partition_config_path.empty()) {
+    config_.set_nnadapter_subgraph_partition_config_path(
+        option.nnadapter_subgraph_partition_config_path);
+  }
+
+  config_.set_nnadapter_dynamic_shape_info(option.nnadapter_dynamic_shape_info);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc
new file mode 100755
index 0000000000..9f7167fceb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.cc
@@ -0,0 +1,298 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/lite/lite_backend.h"
+// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290
+// When compiling the UltraInfer dynamic library, namely,
+// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite
+// static library, you need to include the fake registration
+// codes of Paddle Lite. When you compile the UltraInfer static
+// library and depends on the Paddle Lite static library,
+// WITH_STATIC_LIB=ON, you do not need to include the fake
+// registration codes for Paddle Lite, but wait until you
+// use the UltraInfer static library.
+#if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB)))
+#warning You are compiling the UltraInfer dynamic library with \
+Paddle Lite static lib We will automatically add some registration \
+codes for ops, kernels and passes for Paddle Lite.
+#include "paddle_use_kernels.h" // NOLINT
+#include "paddle_use_ops.h"     // NOLINT
+#include "paddle_use_passes.h"  // NOLINT
+#endif
+
+#include <cstring>
+
+namespace ultrainfer {
+
+void LiteBackend::BuildOption(const LiteBackendOption &option) {
+  option_ = option;
+
+  if (option_.device == Device::CPU) {
+    ConfigureCpu(option_);
+  } else if (option_.device == Device::GPU) {
+    ConfigureGpu(option_);
+  } else if (option_.device == Device::TIMVX) {
+    ConfigureTimvx(option_);
+  } else if (option_.device == Device::KUNLUNXIN) {
+    ConfigureKunlunXin(option_);
+  } else if (option_.device == Device::ASCEND) {
+    ConfigureAscend(option_);
+  }
+  if (option_.cpu_threads > 0) {
+    config_.set_threads(option_.cpu_threads);
+  }
+  if (option_.power_mode > 0) {
+    config_.set_power_mode(
+        static_cast<paddle::lite_api::PowerMode>(option_.power_mode));
+  }
+}
+
+bool LiteBackend::Init(const RuntimeOption &runtime_option) {
+  if (initialized_) {
+    FDERROR << "LiteBackend is already initialized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+
+  if (runtime_option.model_format != ModelFormat::PADDLE) {
+    FDERROR
+        << "PaddleLiteBackend only supports model format PADDLE, but now it's "
+        << runtime_option.model_format << "." << std::endl;
+    return false;
+  }
+  if (runtime_option.device != Device::CPU &&
+      runtime_option.device != Device::GPU &&
+      runtime_option.device != Device::KUNLUNXIN &&
+      runtime_option.device != Device::ASCEND &&
+      runtime_option.device != Device::TIMVX) {
+    FDERROR << "PaddleLiteBackend only supports "
+               "Device::CPU/Device::GPU/Device::TIMVX/Device::KUNLUNXIN/"
+               "Device::ASCEND, "
+               "but now it's "
+            << runtime_option.device << "." << std::endl;
+    return false;
+  }
+  if (runtime_option.device == Device::GPU &&
+      !paddle::lite_api::IsOpenCLBackendValid()) {
+    FDERROR << "PaddleLiteBackend GPU (OpenCL) is not supported by the current "
+               "device."
+            << std::endl;
+  }
+  if (runtime_option.model_from_memory_) {
+    FDERROR << "PaddleLiteBackend doesn't support load model from memory, "
+               "please load model from disk."
+            << std::endl;
+    return false;
+  }
+
+  if (runtime_option.params_file == "") {
+    // Use light api for Arm CPU via MobileConfig.
+    FDASSERT(
+        runtime_option.device == Device::CPU,
+        "In UltraInfer, Paddle Lite light API is only support for Arm CPU now!")
+    mobile_config_.set_model_from_file(runtime_option.model_file);
+    mobile_config_.set_threads(runtime_option.paddle_lite_option.cpu_threads);
+    mobile_config_.set_power_mode(static_cast<paddle::lite_api::PowerMode>(
+        runtime_option.paddle_lite_option.power_mode));
+    // TODO(qiuyanjun): Add OpenCL support for mobile gpu.
+    // Paddle-Lite/blob/develop/lite/api/tools/benchmark/benchmark.h#L265
+    // mobile_config_.set_opencl_tune(
+    //    tune_mode, opencl_cache_dir, opencl_tuned_file);
+    // mobile_config_.set_opencl_precision(gpu_precision);
+    predictor_ =
+        paddle::lite_api::CreatePaddlePredictor<paddle::lite_api::MobileConfig>(
+            mobile_config_);
+  } else {
+    // Use full api for many hardwares via CxxConfig.
+    config_.set_model_file(runtime_option.model_file);
+    config_.set_param_file(runtime_option.params_file);
+    BuildOption(runtime_option.paddle_lite_option);
+    predictor_ =
+        paddle::lite_api::CreatePaddlePredictor<paddle::lite_api::CxxConfig>(
+            config_);
+    if (option_.optimized_model_dir != "") {
+      FDINFO
+          << "Optimzed model dir is not empty, will save optimized model to: "
+          << option_.optimized_model_dir << std::endl;
+      predictor_->SaveOptimizedModel(
+          option_.optimized_model_dir,
+          paddle::lite_api::LiteModelType::kNaiveBuffer);
+    }
+  }
+
+  inputs_desc_.clear();
+  outputs_desc_.clear();
+  inputs_order_.clear();
+  std::vector<std::string> input_names = predictor_->GetInputNames();
+  std::vector<std::string> output_names = predictor_->GetOutputNames();
+  for (size_t i = 0; i < input_names.size(); ++i) {
+    inputs_order_[input_names[i]] = i;
+    TensorInfo info;
+    auto tensor = predictor_->GetInput(i);
+    auto shape = tensor->shape();
+    info.shape.assign(shape.begin(), shape.end());
+    info.name = input_names[i];
+    info.dtype = LiteDataTypeToFD(tensor->precision());
+    inputs_desc_.emplace_back(info);
+  }
+  for (size_t i = 0; i < output_names.size(); ++i) {
+    TensorInfo info;
+    auto tensor = predictor_->GetOutput(i);
+    auto shape = tensor->shape();
+    info.shape.assign(shape.begin(), shape.end());
+    info.name = output_names[i];
+    if (option_.device != Device::KUNLUNXIN) {
+      info.dtype = LiteDataTypeToFD(tensor->precision());
+    }
+    outputs_desc_.emplace_back(info);
+  }
+
+  initialized_ = true;
+  return true;
+}
+
+TensorInfo LiteBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of inputs: %d.", index,
+           NumInputs());
+  return inputs_desc_[index];
+}
+
+std::vector<TensorInfo> LiteBackend::GetInputInfos() { return inputs_desc_; }
+
+TensorInfo LiteBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs %d.", index,
+           NumOutputs());
+  return outputs_desc_[index];
+}
+
+std::vector<TensorInfo> LiteBackend::GetOutputInfos() { return outputs_desc_; }
+
+bool LiteBackend::Infer(std::vector<FDTensor> &inputs,
+                        std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  if (inputs.size() != inputs_desc_.size()) {
+    FDERROR << "[LiteBackend] Size of inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << inputs_desc_.size() << ")." << std::endl;
+    return false;
+  }
+
+  RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    auto iter = inputs_order_.find(inputs[i].name);
+    if (iter == inputs_order_.end()) {
+      FDERROR << "Cannot find input with name:" << inputs[i].name
+              << " in loaded model." << std::endl;
+      return false;
+    }
+
+    auto tensor = predictor_->GetInput(iter->second);
+    // Adjust dims only, allocate lazy.
+    tensor->Resize(inputs[i].shape);
+    if (inputs[i].dtype == FDDataType::FP32) {
+      tensor->CopyFromCpu<float, paddle::lite_api::TargetType::kHost>(
+          reinterpret_cast<const float *>(
+              const_cast<void *>(inputs[i].CpuData())));
+    } else if (inputs[i].dtype == FDDataType::INT32) {
+      tensor->CopyFromCpu<int, paddle::lite_api::TargetType::kHost>(
+          reinterpret_cast<const int *>(
+              const_cast<void *>(inputs[i].CpuData())));
+    } else if (inputs[i].dtype == FDDataType::INT8) {
+      tensor->CopyFromCpu<int8_t, paddle::lite_api::TargetType::kHost>(
+          reinterpret_cast<const int8_t *>(
+              const_cast<void *>(inputs[i].CpuData())));
+    } else if (inputs[i].dtype == FDDataType::UINT8) {
+      tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kHost>(
+          reinterpret_cast<const uint8_t *>(
+              const_cast<void *>(inputs[i].CpuData())));
+    } else if (inputs[i].dtype == FDDataType::INT64) {
+#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) ||         \
+     defined(_M_ARM64))
+      tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kHost>(
+          reinterpret_cast<const int64_t *>(
+              const_cast<void *>(inputs[i].CpuData())));
+#else
+      FDASSERT(false, "FDDataType::INT64 is not support for x86/armv7 now!");
+#endif
+    } else {
+      FDASSERT(false, "Unexpected data type of %d.", inputs[i].dtype);
+    }
+  }
+
+  RUNTIME_PROFILE_LOOP_BEGIN(1)
+  predictor_->Run();
+  RUNTIME_PROFILE_LOOP_END
+
+  outputs->resize(outputs_desc_.size());
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    auto tensor = predictor_->GetOutput(i);
+    if (outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())) {
+      outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision());
+    }
+    (*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype,
+                         outputs_desc_[i].name);
+    memcpy((*outputs)[i].MutableData(), tensor->data<void>(),
+           (*outputs)[i].Nbytes());
+  }
+  RUNTIME_PROFILE_LOOP_H2D_D2H_END
+  return true;
+}
+
+bool ReadFile(const std::string &filename, std::vector<char> *contents,
+              bool binary) {
+  FILE *fp = fopen(filename.c_str(), binary ? "rb" : "r");
+  if (!fp) {
+    FDERROR << "Cannot open file " << filename << "." << std::endl;
+    return false;
+  }
+  fseek(fp, 0, SEEK_END);
+  size_t size = ftell(fp);
+  fseek(fp, 0, SEEK_SET);
+  contents->clear();
+  contents->resize(size);
+  size_t offset = 0;
+  char *ptr = reinterpret_cast<char *>(&(contents->at(0)));
+  while (offset < size) {
+    size_t already_read = fread(ptr, 1, size - offset, fp);
+    offset += already_read;
+    ptr += already_read;
+  }
+  fclose(fp);
+  return true;
+}
+
+// Convert data type from paddle lite to ultrainfer
+FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType &dtype) {
+  if (dtype == paddle::lite_api::PrecisionType::kFloat) {
+    return FDDataType::FP32;
+  } else if (dtype == paddle::lite_api::PrecisionType::kInt8) {
+    return FDDataType::INT8;
+  } else if (dtype == paddle::lite_api::PrecisionType::kInt32) {
+    return FDDataType::INT32;
+  } else if (dtype == paddle::lite_api::PrecisionType::kInt64) {
+    return FDDataType::INT64;
+  } else if (dtype == paddle::lite_api::PrecisionType::kInt16) {
+    return FDDataType::INT16;
+  } else if (dtype == paddle::lite_api::PrecisionType::kUInt8) {
+    return FDDataType::UINT8;
+  } else if (dtype == paddle::lite_api::PrecisionType::kFP64) {
+    return FDDataType::FP64;
+  }
+  FDASSERT(false, "Unexpected data type of %s.",
+           paddle::lite_api::PrecisionToStr(dtype).c_str());
+  return FDDataType::FP32;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h
new file mode 100755
index 0000000000..11873eada4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/lite_backend.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paddle_api.h" // NOLINT
+
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/backends/lite/option.h"
+#include "ultrainfer/runtime/runtime_option.h"
+
+namespace ultrainfer {
+
+class LiteBackend : public BaseBackend {
+public:
+  LiteBackend() {}
+  virtual ~LiteBackend() = default;
+
+  bool Init(const RuntimeOption &option) override;
+
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override; // NOLINT
+
+  int NumInputs() const override { return inputs_desc_.size(); }
+
+  int NumOutputs() const override { return outputs_desc_.size(); }
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+
+private:
+  // Build CxxConfig from option for Paddle Lite full api.
+  void BuildOption(const LiteBackendOption &option);
+  // Configure many hardwares for Paddle Lite full api.
+  void ConfigureCpu(const LiteBackendOption &option);
+  void ConfigureGpu(const LiteBackendOption &option);
+  void ConfigureTimvx(const LiteBackendOption &option);
+  void ConfigureAscend(const LiteBackendOption &option);
+  void ConfigureKunlunXin(const LiteBackendOption &option);
+  void ConfigureNNAdapter(const LiteBackendOption &option);
+
+  paddle::lite_api::CxxConfig config_;
+  std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor_;
+  paddle::lite_api::MobileConfig mobile_config_;
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+  std::map<std::string, int> inputs_order_;
+  LiteBackendOption option_;
+};
+
+// Convert data type from paddle lite to ultrainfer
+FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType &dtype);
+
+// Helper function to read file
+bool ReadFile(const std::string &filename, std::vector<char> *contents,
+              bool binary = true);
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h
new file mode 100755
index 0000000000..d3530be27d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option.h
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_type.h"
+// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290
+#if (defined(WITH_LITE_STATIC) && defined(WITH_STATIC_LIB))
+// Whether to output some warning messages when using the
+// FastDepoy static library, default OFF. These messages
+// are only reserve for debugging.
+#if defined(WITH_STATIC_WARNING)
+#warning You are using the UltraInfer static library. We will automatically add some registration codes for ops, kernels and passes for Paddle Lite. // NOLINT
+#endif
+#if !defined(WITH_STATIC_LIB_AT_COMPILING)
+#include "paddle_use_kernels.h" // NOLINT
+#include "paddle_use_ops.h"     // NOLINT
+#include "paddle_use_passes.h"  // NOLINT
+#endif
+#endif
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+
+/*! Paddle Lite power mode for mobile device. */
+enum LitePowerMode {
+  LITE_POWER_HIGH = 0,      ///< Use Lite Backend with high power mode
+  LITE_POWER_LOW = 1,       ///< Use Lite Backend with low power mode
+  LITE_POWER_FULL = 2,      ///< Use Lite Backend with full power mode
+  LITE_POWER_NO_BIND = 3,   ///< Use Lite Backend with no bind power mode
+  LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
+  LITE_POWER_RAND_LOW = 5   ///< Use Lite Backend with rand low power mode
+};
+
+/*! @brief Option object to configure Paddle Lite backend
+ */
+struct LiteBackendOption {
+  /// Paddle Lite power mode for mobile device.
+  int power_mode = 3;
+  // Number of threads while use CPU
+  int cpu_threads = 1;
+  /// Enable use half precision
+  bool enable_fp16 = false;
+  // Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
+  Device device = Device::CPU;
+  // Index of inference device
+  int device_id = 0;
+  // TODO(qiuyanjun): add opencl binary path and cache settings.
+  std::string opencl_cache_dir = "/data/local/tmp/";
+  std::string opencl_tuned_file = "/data/local/tmp/opencl_tuned_kernels.bin";
+
+  /// kunlunxin_l3_workspace_size
+  int kunlunxin_l3_workspace_size = 0xfffc00;
+  /// kunlunxin_locked
+  bool kunlunxin_locked = false;
+  /// kunlunxin_autotune
+  bool kunlunxin_autotune = true;
+  /// kunlunxin_autotune_file
+  std::string kunlunxin_autotune_file = "";
+  /// kunlunxin_precision
+  std::string kunlunxin_precision = "int16";
+  /// kunlunxin_adaptive_seqlen
+  bool kunlunxin_adaptive_seqlen = false;
+  /// kunlunxin_enable_multi_stream
+  bool kunlunxin_enable_multi_stream = false;
+  /// kunlunxin_gm_default_size
+  int64_t kunlunxin_gm_default_size = 0;
+
+  /// Optimized model dir for CxxConfig
+  std::string optimized_model_dir = "";
+  /// nnadapter_subgraph_partition_config_path
+  std::string nnadapter_subgraph_partition_config_path = "";
+  /// nnadapter_subgraph_partition_config_buffer
+  std::string nnadapter_subgraph_partition_config_buffer = "";
+  /// nnadapter_context_properties
+  std::string nnadapter_context_properties = "";
+  /// nnadapter_model_cache_dir
+  std::string nnadapter_model_cache_dir = "";
+  /// nnadapter_mixed_precision_quantization_config_path
+  std::string nnadapter_mixed_precision_quantization_config_path = "";
+  /// nnadapter_dynamic_shape_info
+  std::map<std::string, std::vector<std::vector<int64_t>>>
+      nnadapter_dynamic_shape_info = {{"", {{0}}}};
+  /// nnadapter_device_names
+  std::vector<std::string> nnadapter_device_names = {};
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc
new file mode 100755
index 0000000000..d66c65d507
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/lite/option_pybind.cc
@@ -0,0 +1,64 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/runtime/backends/lite/option.h"
+
+namespace ultrainfer {
+
+void BindLiteOption(pybind11::module &m) {
+  pybind11::class_<LiteBackendOption>(m, "LiteBackendOption")
+      .def(pybind11::init())
+      .def_readwrite("power_mode", &LiteBackendOption::power_mode)
+      .def_readwrite("cpu_threads", &LiteBackendOption::cpu_threads)
+      .def_readwrite("enable_fp16", &LiteBackendOption::enable_fp16)
+      .def_readwrite("device", &LiteBackendOption::device)
+      .def_readwrite("optimized_model_dir",
+                     &LiteBackendOption::optimized_model_dir)
+      .def_readwrite(
+          "nnadapter_subgraph_partition_config_path",
+          &LiteBackendOption::nnadapter_subgraph_partition_config_path)
+      .def_readwrite(
+          "nnadapter_subgraph_partition_config_buffer",
+          &LiteBackendOption::nnadapter_subgraph_partition_config_buffer)
+      .def_readwrite("nnadapter_context_properties",
+                     &LiteBackendOption::nnadapter_context_properties)
+      .def_readwrite("nnadapter_model_cache_dir",
+                     &LiteBackendOption::nnadapter_model_cache_dir)
+      .def_readwrite("nnadapter_mixed_precision_quantization_config_path",
+                     &LiteBackendOption::
+                         nnadapter_mixed_precision_quantization_config_path)
+      .def_readwrite("nnadapter_dynamic_shape_info",
+                     &LiteBackendOption::nnadapter_dynamic_shape_info)
+      .def_readwrite("nnadapter_device_names",
+                     &LiteBackendOption::nnadapter_device_names)
+      .def_readwrite("device_id", &LiteBackendOption::device_id)
+      .def_readwrite("kunlunxin_l3_workspace_size",
+                     &LiteBackendOption::kunlunxin_l3_workspace_size)
+      .def_readwrite("kunlunxin_locked", &LiteBackendOption::kunlunxin_locked)
+      .def_readwrite("kunlunxin_autotune",
+                     &LiteBackendOption::kunlunxin_autotune)
+      .def_readwrite("kunlunxin_autotune_file",
+                     &LiteBackendOption::kunlunxin_autotune_file)
+      .def_readwrite("kunlunxin_precision",
+                     &LiteBackendOption::kunlunxin_precision)
+      .def_readwrite("kunlunxin_gm_default_size",
+                     &LiteBackendOption::kunlunxin_gm_default_size)
+      .def_readwrite("kunlunxin_adaptive_seqlen",
+                     &LiteBackendOption::kunlunxin_adaptive_seqlen)
+      .def_readwrite("kunlunxin_enable_multi_stream",
+                     &LiteBackendOption::kunlunxin_enable_multi_stream);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h
new file mode 100755
index 0000000000..55808e1ce2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option.h
@@ -0,0 +1,100 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_type.h"
+#include <iostream>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+namespace ultrainfer {
+
+/*! @brief Option object to configure OpenVINO backend
+ */
+struct OpenVINOBackendOption {
+  std::string device = "CPU";
+  int cpu_thread_num = -1;
+
+  /// Number of streams while use OpenVINO
+  int num_streams = 1;
+
+  /// Affinity mode
+  std::string affinity = "YES";
+
+  /// Performance hint mode
+  std::string hint = "UNDEFINED";
+
+  /**
+   * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO',
+   * 'GPU', 'GPU.1'....
+   */
+  void SetDevice(const std::string &name = "CPU") { device = name; }
+
+  /**
+   * @brief Set shape info for OpenVINO
+   */
+  void SetShapeInfo(
+      const std::map<std::string, std::vector<int64_t>> &_shape_infos) {
+    shape_infos = _shape_infos;
+  }
+
+  /**
+   * @brief While use OpenVINO backend with intel GPU, use this interface to
+   * specify operators run on CPU
+   */
+  void SetCpuOperators(const std::vector<std::string> &operators) {
+    for (const auto &op : operators) {
+      cpu_operators.insert(op);
+    }
+  }
+
+  /**
+   * @brief Set Affinity mode
+   */
+  void SetAffinity(const std::string &_affinity) {
+    FDASSERT(_affinity == "YES" || _affinity == "NO" || _affinity == "NUMA" ||
+                 _affinity == "HYBRID_AWARE",
+             "The affinity mode should be one of the list "
+             "['YES', 'NO', 'NUMA', "
+             "'HYBRID_AWARE'] ");
+    affinity = _affinity;
+  }
+
+  /**
+   * @brief Set the Performance Hint
+   */
+  void SetPerformanceHint(const std::string &_hint) {
+    FDASSERT(_hint == "LATENCY" || _hint == "THROUGHPUT" ||
+                 _hint == "CUMULATIVE_THROUGHPUT" || _hint == "UNDEFINED",
+             "The performance hint should be one of the list "
+             "['LATENCY', 'THROUGHPUT', 'CUMULATIVE_THROUGHPUT', "
+             "'UNDEFINED'] ");
+    hint = _hint;
+  }
+
+  /**
+   * @brief Set the number of streams
+   */
+  void SetStreamNum(int _num_streams) {
+    FDASSERT(_num_streams > 0, "The stream_num must be greater than 0.");
+    num_streams = _num_streams;
+  }
+
+  std::map<std::string, std::vector<int64_t>> shape_infos;
+  std::set<std::string> cpu_operators{"MulticlassNms"};
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc
new file mode 100755
index 0000000000..50f53c3692
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/option_pybind.cc
@@ -0,0 +1,35 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/runtime/backends/openvino/option.h"
+
+namespace ultrainfer {
+
+void BindOpenVINOOption(pybind11::module &m) {
+  pybind11::class_<OpenVINOBackendOption>(m, "OpenVINOBackendOption")
+      .def(pybind11::init())
+      .def_readwrite("cpu_thread_num", &OpenVINOBackendOption::cpu_thread_num)
+      .def_readwrite("num_streams", &OpenVINOBackendOption::num_streams)
+      .def_readwrite("affinity", &OpenVINOBackendOption::affinity)
+      .def_readwrite("hint", &OpenVINOBackendOption::hint)
+      .def("set_device", &OpenVINOBackendOption::SetDevice)
+      .def("set_shape_info", &OpenVINOBackendOption::SetShapeInfo)
+      .def("set_cpu_operators", &OpenVINOBackendOption::SetCpuOperators)
+      .def("set_affinity", &OpenVINOBackendOption::SetAffinity)
+      .def("set_performance_hint", &OpenVINOBackendOption::SetPerformanceHint)
+      .def("set_stream_num", &OpenVINOBackendOption::SetStreamNum);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc
new file mode 100755
index 0000000000..5d4163acfb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.cc
@@ -0,0 +1,457 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/openvino/ov_backend.h"
+#ifdef ENABLE_PADDLE2ONNX
+#include "paddle2onnx/converter.h"
+#endif
+
+namespace ultrainfer {
+
+std::vector<int64_t> PartialShapeToVec(const ov::PartialShape &shape) {
+  std::vector<int64_t> res;
+  for (int i = 0; i < shape.size(); ++i) {
+    auto dim = shape[i];
+    if (dim.is_dynamic()) {
+      res.push_back(-1);
+    } else {
+      res.push_back(dim.get_length());
+    }
+  }
+  return res;
+}
+
+ov::PartialShape VecToPartialShape(const std::vector<int64_t> &shape) {
+  std::vector<ov::Dimension> dims;
+  for (size_t i = 0; i < shape.size(); ++i) {
+    dims.emplace_back(ov::Dimension(shape[i]));
+  }
+  return ov::PartialShape(dims);
+}
+
+FDDataType OpenVINODataTypeToFD(const ov::element::Type &type) {
+  if (type == ov::element::f32) {
+    return FDDataType::FP32;
+  } else if (type == ov::element::f16) {
+    return FDDataType::FP16;
+  } else if (type == ov::element::f64) {
+    return FDDataType::FP64;
+  } else if (type == ov::element::i8) {
+    return FDDataType::INT8;
+  } else if (type == ov::element::u8) {
+    return FDDataType::UINT8;
+  } else if (type == ov::element::i32) {
+    return FDDataType::INT32;
+  } else if (type == ov::element::i64) {
+    return FDDataType::INT64;
+  } else {
+    FDASSERT(false, "Only support float/double/int8/int32/int64/float16 now.");
+  }
+  return FDDataType::FP32;
+}
+
+ov::element::Type FDDataTypeToOV(const FDDataType &type) {
+  if (type == FDDataType::FP32) {
+    return ov::element::f32;
+  } else if (type == FDDataType::FP64) {
+    return ov::element::f64;
+  } else if (type == FDDataType::INT8) {
+    return ov::element::i8;
+  } else if (type == FDDataType::UINT8) {
+    return ov::element::u8;
+  } else if (type == FDDataType::INT32) {
+    return ov::element::i32;
+  } else if (type == FDDataType::INT64) {
+    return ov::element::i64;
+  } else if (type == FDDataType::FP16) {
+    return ov::element::f16;
+  }
+  FDASSERT(false,
+           "Only support float/double/int8/uint8/int32/int64/float16 now.");
+  return ov::element::f32;
+}
+
+ov::Core OpenVINOBackend::core_;
+
+void OpenVINOBackend::InitTensorInfo(
+    const std::vector<ov::Output<ov::Node>> &ov_outputs,
+    std::map<std::string, TensorInfo> *tensor_infos) {
+  for (size_t i = 0; i < ov_outputs.size(); ++i) {
+    TensorInfo info;
+    auto partial_shape = PartialShapeToVec(ov_outputs[i].get_partial_shape());
+    info.shape.assign(partial_shape.begin(), partial_shape.end());
+    info.name = ov_outputs[i].get_any_name();
+    info.dtype = OpenVINODataTypeToFD(ov_outputs[i].get_element_type());
+    tensor_infos->insert(std::make_pair(info.name, info));
+  }
+}
+
+bool OpenVINOBackend::Init(const RuntimeOption &option) {
+  if (option.model_from_memory_) {
+    FDERROR << "OpenVINOBackend doesn't support load model from memory, please "
+               "load model from disk."
+            << std::endl;
+    return false;
+  }
+  if (option.device != Device::CPU) {
+    FDERROR << "OpenVINOBackend only supports Device::CPU, but now its "
+            << option.device << "." << std::endl;
+    return false;
+  }
+
+  if (option.model_format == ModelFormat::PADDLE) {
+    return InitFromPaddle(option.model_file, option.params_file,
+                          option.openvino_option);
+  } else if (option.model_format == ModelFormat::ONNX) {
+    return InitFromOnnx(option.model_file, option.openvino_option);
+  } else {
+    FDERROR << "OpenVINOBackend only supports model format Paddle/ONNX, but "
+               "now its "
+            << option.model_format << std::endl;
+    return false;
+  }
+  return false;
+}
+
+bool OpenVINOBackend::InitFromPaddle(const std::string &model_file,
+                                     const std::string &params_file,
+                                     const OpenVINOBackendOption &option) {
+  if (initialized_) {
+    FDERROR << "OpenVINOBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  option_ = option;
+
+  std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);
+  if (option_.shape_infos.size() > 0) {
+    std::map<std::string, ov::PartialShape> shape_infos;
+    for (const auto &item : option_.shape_infos) {
+      shape_infos[item.first] = VecToPartialShape(item.second);
+    }
+    model->reshape(shape_infos);
+  }
+
+  if (option_.device.find("HETERO") != std::string::npos) {
+    auto supported_ops = core_.query_model(model, option_.device);
+    for (auto &&op : model->get_ops()) {
+      auto &affinity = supported_ops[op->get_friendly_name()];
+      if (option_.cpu_operators.find(op->description()) !=
+          option_.cpu_operators.end()) {
+        op->get_rt_info()["affinity"] = "CPU";
+      } else {
+        op->get_rt_info()["affinity"] = affinity;
+      }
+    }
+  }
+
+  // Get inputs/outputs information from loaded model
+  const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
+  std::map<std::string, TensorInfo> input_infos;
+  InitTensorInfo(inputs, &input_infos);
+
+  const std::vector<ov::Output<ov::Node>> outputs = model->outputs();
+  std::map<std::string, TensorInfo> output_infos;
+  InitTensorInfo(outputs, &output_infos);
+
+  // OpenVINO model may not keep the same order with original model
+  // So here will reorder it's inputs and outputs
+  std::string model_content;
+  ReadBinaryFromFile(model_file, &model_content);
+  auto reader =
+      paddle2onnx::PaddleReader(model_content.c_str(), model_content.size());
+  if (reader.num_inputs != input_infos.size()) {
+    FDERROR << "The number of inputs from PaddleReader:" << reader.num_inputs
+            << " not equal to the number of inputs from OpenVINO:"
+            << input_infos.size() << "." << std::endl;
+    return false;
+  }
+  if (reader.num_outputs != output_infos.size()) {
+    FDERROR << "The number of outputs from PaddleReader:" << reader.num_outputs
+            << " not equal to the number of outputs from OpenVINO:"
+            << output_infos.size() << "." << std::endl;
+    return false;
+  }
+  for (int i = 0; i < reader.num_inputs; ++i) {
+    auto iter = input_infos.find(std::string(reader.inputs[i].name));
+    if (iter == input_infos.end()) {
+      FDERROR << "Cannot find input name:" << reader.inputs[i].name
+              << " from OpenVINO model." << std::endl;
+      return false;
+    }
+    input_infos_.push_back(iter->second);
+  }
+  for (int i = 0; i < reader.num_outputs; ++i) {
+    auto iter = output_infos.find(std::string(reader.outputs[i].name));
+    if (iter == output_infos.end()) {
+      FDERROR << "Cannot find output name:" << reader.outputs[i].name
+              << " from OpenVINO model." << std::endl;
+      return false;
+    }
+    output_infos_.push_back(iter->second);
+  }
+
+  ov::AnyMap properties;
+  if (option_.hint == "UNDEFINED") {
+    if (option_.device == "CPU" && option_.cpu_thread_num > 0) {
+      properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
+    }
+    if (option_.num_streams == -1) {
+      properties["NUM_STREAMS"] = ov::streams::AUTO;
+    } else if (option_.num_streams == -2) {
+      properties["NUM_STREAMS"] = ov::streams::NUMA;
+    } else if (option_.num_streams > 0) {
+      properties["NUM_STREAMS"] = option_.num_streams;
+    }
+
+    FDINFO << "number of streams:" << option_.num_streams << "." << std::endl;
+    if (option_.affinity == "YES") {
+      properties["AFFINITY"] = "CORE";
+    } else if (option_.affinity == "NO") {
+      properties["AFFINITY"] = "NONE";
+    } else if (option_.affinity == "NUMA") {
+      properties["AFFINITY"] = "NUMA";
+    } else if (option_.affinity == "HYBRID_AWARE") {
+      properties["AFFINITY"] = "HYBRID_AWARE";
+    }
+    FDINFO << "affinity:" << option_.affinity << "." << std::endl;
+  } else if (option_.hint == "LATENCY") {
+    properties.emplace(
+        ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
+  } else if (option_.hint == "THROUGHPUT") {
+    properties.emplace(
+        ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
+  } else if (option_.hint == "CUMULATIVE_THROUGHPUT") {
+    properties.emplace(ov::hint::performance_mode(
+        ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT));
+  }
+
+  FDINFO << "Compile OpenVINO model on device_name:" << option.device << "."
+         << std::endl;
+
+  compiled_model_ = core_.compile_model(model, option.device, properties);
+
+  request_ = compiled_model_.create_infer_request();
+  initialized_ = true;
+  return true;
+}
+
+TensorInfo OpenVINOBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of outputs: %d.", index,
+           NumOutputs());
+  return input_infos_[index];
+}
+
+std::vector<TensorInfo> OpenVINOBackend::GetInputInfos() {
+  return input_infos_;
+}
+
+std::vector<TensorInfo> OpenVINOBackend::GetOutputInfos() {
+  return output_infos_;
+}
+
+TensorInfo OpenVINOBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs: %d.", index,
+           NumOutputs());
+  return output_infos_[index];
+}
+
+bool OpenVINOBackend::InitFromOnnx(const std::string &model_file,
+                                   const OpenVINOBackendOption &option) {
+  if (initialized_) {
+    FDERROR << "OpenVINOBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  option_ = option;
+
+  std::shared_ptr<ov::Model> model = core_.read_model(model_file);
+  if (option_.shape_infos.size() > 0) {
+    std::map<std::string, ov::PartialShape> shape_infos;
+    for (const auto &item : option_.shape_infos) {
+      shape_infos[item.first] = VecToPartialShape(item.second);
+    }
+    model->reshape(shape_infos);
+  }
+
+  if (option_.device.find("HETERO") != std::string::npos) {
+    auto supported_ops = core_.query_model(model, option_.device);
+    for (auto &&op : model->get_ops()) {
+      auto &affinity = supported_ops[op->get_friendly_name()];
+      if (option_.cpu_operators.find(op->description()) !=
+          option_.cpu_operators.end()) {
+        op->get_rt_info()["affinity"] = "CPU";
+      } else {
+        op->get_rt_info()["affinity"] = affinity;
+      }
+    }
+  }
+
+  // Get inputs/outputs information from loaded model
+  const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
+  std::map<std::string, TensorInfo> input_infos;
+  InitTensorInfo(inputs, &input_infos);
+
+  const std::vector<ov::Output<ov::Node>> outputs = model->outputs();
+  std::map<std::string, TensorInfo> output_infos;
+  InitTensorInfo(outputs, &output_infos);
+
+  // OpenVINO model may not keep the same order with original model
+  // So here will reorder it's inputs and outputs
+  std::string model_content;
+  ReadBinaryFromFile(model_file, &model_content);
+  auto reader =
+      paddle2onnx::OnnxReader(model_content.c_str(), model_content.size());
+  if (reader.num_inputs != input_infos.size()) {
+    FDERROR << "The number of inputs from OnnxReader:" << reader.num_inputs
+            << " not equal to the number of inputs from OpenVINO:"
+            << input_infos.size() << "." << std::endl;
+    return false;
+  }
+  if (reader.num_outputs != output_infos.size()) {
+    FDERROR << "The number of outputs from OnnxReader:" << reader.num_outputs
+            << " not equal to the number of outputs from OpenVINO:"
+            << output_infos.size() << "." << std::endl;
+    return false;
+  }
+  for (int i = 0; i < reader.num_inputs; ++i) {
+    auto iter = input_infos.find(std::string(reader.inputs[i].name));
+    if (iter == input_infos.end()) {
+      FDERROR << "Cannot find input name:" << reader.inputs[i].name
+              << " from OpenVINO model." << std::endl;
+      return false;
+    }
+    input_infos_.push_back(iter->second);
+  }
+  for (int i = 0; i < reader.num_outputs; ++i) {
+    auto iter = output_infos.find(std::string(reader.outputs[i].name));
+    if (iter == output_infos.end()) {
+      FDERROR << "Cannot find output name:" << reader.outputs[i].name
+              << " from OpenVINO model." << std::endl;
+      return false;
+    }
+    output_infos_.push_back(iter->second);
+  }
+
+  ov::AnyMap properties;
+  if (option_.hint == "UNDEFINED") {
+    if (option_.device == "CPU" && option_.cpu_thread_num > 0) {
+      properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
+    }
+    if (option_.num_streams == -1) {
+      properties["NUM_STREAMS"] = ov::streams::AUTO;
+    } else if (option_.num_streams == -2) {
+      properties["NUM_STREAMS"] = ov::streams::NUMA;
+    } else if (option_.num_streams > 0) {
+      properties["NUM_STREAMS"] = option_.num_streams;
+    }
+
+    FDINFO << "number of streams:" << option_.num_streams << "." << std::endl;
+    if (option_.affinity == "YES") {
+      properties["AFFINITY"] = "CORE";
+    } else if (option_.affinity == "NO") {
+      properties["AFFINITY"] = "NONE";
+    } else if (option_.affinity == "NUMA") {
+      properties["AFFINITY"] = "NUMA";
+    } else if (option_.affinity == "HYBRID_AWARE") {
+      properties["AFFINITY"] = "HYBRID_AWARE";
+    }
+    FDINFO << "affinity:" << option_.affinity << "." << std::endl;
+  } else if (option_.hint == "LATENCY") {
+    properties.emplace(
+        ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY));
+  } else if (option_.hint == "THROUGHPUT") {
+    properties.emplace(
+        ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT));
+  } else if (option_.hint == "CUMULATIVE_THROUGHPUT") {
+    properties.emplace(ov::hint::performance_mode(
+        ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT));
+  }
+
+  FDINFO << "Compile OpenVINO model on device_name:" << option.device << "."
+         << std::endl;
+  compiled_model_ = core_.compile_model(model, option.device, properties);
+
+  request_ = compiled_model_.create_infer_request();
+
+  initialized_ = true;
+  return true;
+}
+
+int OpenVINOBackend::NumInputs() const { return input_infos_.size(); }
+
+int OpenVINOBackend::NumOutputs() const { return output_infos_.size(); }
+
+bool OpenVINOBackend::Infer(std::vector<FDTensor> &inputs,
+                            std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  if (inputs.size() != input_infos_.size()) {
+    FDERROR << "[OpenVINOBackend] Size of the inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << input_infos_.size() << ")." << std::endl;
+    return false;
+  }
+
+  RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    ov::Shape shape(inputs[i].shape.begin(), inputs[i].shape.end());
+    ov::Tensor ov_tensor(FDDataTypeToOV(inputs[i].dtype), shape,
+                         inputs[i].Data());
+    request_.set_tensor(inputs[i].name, ov_tensor);
+  }
+
+  RUNTIME_PROFILE_LOOP_BEGIN(1)
+  request_.start_async();
+  request_.wait();
+  RUNTIME_PROFILE_LOOP_END
+
+  outputs->resize(output_infos_.size());
+  for (size_t i = 0; i < output_infos_.size(); ++i) {
+    auto out_tensor = request_.get_output_tensor(i);
+    auto out_tensor_shape = out_tensor.get_shape();
+    std::vector<int64_t> shape(out_tensor_shape.begin(),
+                               out_tensor_shape.end());
+    if (copy_to_fd) {
+      (*outputs)[i].Resize(shape,
+                           OpenVINODataTypeToFD(out_tensor.get_element_type()),
+                           output_infos_[i].name, Device::CPU);
+      memcpy((*outputs)[i].MutableData(), out_tensor.data(),
+             (*outputs)[i].Nbytes());
+    } else {
+      (*outputs)[i].name = output_infos_[i].name;
+      (*outputs)[i].SetExternalData(
+          shape, OpenVINODataTypeToFD(out_tensor.get_element_type()),
+          out_tensor.data(), Device::CPU);
+    }
+  }
+  RUNTIME_PROFILE_LOOP_H2D_D2H_END
+  return true;
+}
+
+std::unique_ptr<BaseBackend>
+OpenVINOBackend::Clone(RuntimeOption &runtime_option, void *stream,
+                       int device_id) {
+  std::unique_ptr<BaseBackend> new_backend =
+      utils::make_unique<OpenVINOBackend>();
+  auto casted_backend = dynamic_cast<OpenVINOBackend *>(new_backend.get());
+  casted_backend->option_ = option_;
+  casted_backend->request_ = compiled_model_.create_infer_request();
+  casted_backend->input_infos_.assign(input_infos_.begin(), input_infos_.end());
+  casted_backend->output_infos_.assign(output_infos_.begin(),
+                                       output_infos_.end());
+  return new_backend;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h
new file mode 100755
index 0000000000..7119d60549
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/openvino/ov_backend.h
@@ -0,0 +1,72 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "openvino/openvino.hpp"
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/backends/openvino/option.h"
+#include "ultrainfer/utils/unique_ptr.h"
+
+namespace ultrainfer {
+
+class OpenVINOBackend : public BaseBackend {
+public:
+  static ov::Core core_;
+  OpenVINOBackend() {}
+  virtual ~OpenVINOBackend() = default;
+
+  bool Init(const RuntimeOption &option);
+
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+
+  int NumInputs() const override;
+
+  int NumOutputs() const override;
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+
+  std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
+                                     void *stream = nullptr,
+                                     int device_id = -1) override;
+
+private:
+  bool
+  InitFromPaddle(const std::string &model_file, const std::string &params_file,
+                 const OpenVINOBackendOption &option = OpenVINOBackendOption());
+
+  bool
+  InitFromOnnx(const std::string &model_file,
+               const OpenVINOBackendOption &option = OpenVINOBackendOption());
+
+  void InitTensorInfo(const std::vector<ov::Output<ov::Node>> &ov_outputs,
+                      std::map<std::string, TensorInfo> *tensor_infos);
+
+  ov::CompiledModel compiled_model_;
+  ov::InferRequest request_;
+  OpenVINOBackendOption option_;
+  std::vector<TensorInfo> input_infos_;
+  std::vector<TensorInfo> output_infos_;
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc
new file mode 100755
index 0000000000..c09a2288a2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.cc
@@ -0,0 +1,125 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef NON_64_PLATFORM
+
+#include "adaptive_pool2d.h"
+
+namespace ultrainfer {
+
+void AdaptivePool2dKernel::CpuAdaptivePool(
+    const std::vector<int64_t> &input_size,
+    const std::vector<int64_t> &output_size, const float *input_data,
+    float *output_data) {
+  int64_t in_bc_offset = input_size[2] * input_size[3];
+  int64_t out_bc_offset = output_size[2] * output_size[3];
+  for (int64_t b = 0; b < output_size[0]; b++) {
+    for (int64_t c = 0; c < output_size[1]; c++) {
+      for (int64_t h = 0; h < output_size[2]; h++) {
+        int64_t hstart =
+            std::floor(static_cast<float>(h * input_size[2]) / output_size[2]);
+        int64_t hend = std::ceil(static_cast<float>((h + 1) * input_size[2]) /
+                                 output_size[2]);
+        for (int64_t w = 0; w < output_size[3]; w++) {
+          int64_t wstart = std::floor(static_cast<float>(w * input_size[3]) /
+                                      output_size[3]);
+          int64_t wend = std::ceil(static_cast<float>((w + 1) * input_size[3]) /
+                                   output_size[3]);
+          int64_t out_offset = h * output_size[3] + w;
+          output_data[out_offset] = 0;
+          for (auto i = hstart; i < hend; i++) {
+            for (auto j = wstart; j < wend; j++) {
+              if (pooling_type_ == "avg") {
+                output_data[out_offset] += input_data[i * input_size[3] + j];
+              }
+              if (pooling_type_ == "max") {
+                output_data[out_offset] = std::max(
+                    output_data[out_offset], input_data[i * input_size[3] + j]);
+              }
+            }
+          }
+          if (pooling_type_ == "avg") {
+            output_data[out_offset] /= ((hend - hstart) * (wend - wstart));
+          }
+        }
+      }
+      output_data += out_bc_offset;
+      input_data += in_bc_offset;
+    }
+  }
+}
+
+void AdaptivePool2dKernel::Compute(OrtKernelContext *context) {
+#if ORT_API_VERSION >= 14
+  Ort::KernelContext ort_context{context};
+  Ort::ConstValue input = ort_context.GetInput(0);
+#else
+  Ort::CustomOpApi api{ort_};
+  Ort::Unowned<const Ort::Value> input{
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 0))};
+#endif
+  auto input_data = input.GetTensorData<float>();
+  auto input_dim = input.GetTensorTypeAndShapeInfo().GetShape();
+
+  output_size_[0] = input_dim[0];
+  std::vector<int64_t> input_size;
+  for (auto i : input_dim) {
+    input_size.push_back(i);
+  }
+
+#if ORT_API_VERSION >= 14
+  auto output = ort_context.GetOutput(0, output_size_);
+#else
+  Ort::Unowned<Ort::Value> output{api.KernelContext_GetOutput(
+      context, 0, output_size_.data(), output_size_.size())};
+#endif
+  float *output_data = output.GetTensorMutableData<float>();
+  if (!strcmp(this->provider_, "CUDAExecutionProvider")) {
+#ifdef WITH_GPU
+    auto compute_stream =
+#if ORT_API_VERSION >= 14
+        ort_context.GetGPUComputeStream();
+#else
+        api.KernelContext_GetGPUComputeStream(context);
+#endif
+    CudaAdaptivePool(input_size, output_size_, output_data, input_data,
+                     compute_stream, pooling_type_);
+#else
+    FDWARNING << "UltraInfer didn't compile with WITH_GPU. "
+              << "Will force to use CPU to run." << std::endl;
+    CpuAdaptivePool(input_size, output_size_, input_data, output_data);
+#endif
+  } else {
+    CpuAdaptivePool(input_size, output_size_, input_data, output_data);
+  }
+}
+
+void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo *info) {
+#if ORT_API_VERSION >= 14
+  Ort::ConstKernelInfo ort_info{info};
+  pooling_type_ = ort_info.GetAttribute<std::string>("pooling_type");
+  output_size_ = ort_info.GetAttributes<int64_t>("output_size");
+#else
+  Ort::CustomOpApi api{ort_};
+  pooling_type_ = api.KernelInfoGetAttribute<std::string>(info, "pooling_type");
+  output_size_ =
+      api.KernelInfoGetAttribute<std::vector<int64_t>>(info, "output_size");
+#endif
+  FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 &&
+               output_size_[3] > 0,
+           "The output size of adaptive pool must be positive.");
+}
+} // namespace ultrainfer
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h
new file mode 100755
index 0000000000..643d0b7537
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h
@@ -0,0 +1,86 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <string>
+
+#ifndef NON_64_PLATFORM
+#include "onnxruntime_cxx_api.h" // NOLINT
+
+#ifdef WITH_GPU
+#include "ultrainfer/runtime/backends/common/cuda/adaptive_pool2d_kernel.h"
+#endif
+
+namespace ultrainfer {
+struct AdaptivePool2dKernel {
+protected:
+  std::string pooling_type_ = "avg";
+  std::vector<int64_t> output_size_ = {};
+  OrtApi ort_;
+  void *compute_stream_;
+  const char *provider_;
+
+public:
+  AdaptivePool2dKernel(OrtApi ort, const OrtKernelInfo *info,
+                       const char *provider)
+      : ort_(ort) {
+    GetAttribute(info);
+    provider_ = provider;
+  }
+
+  void GetAttribute(const OrtKernelInfo *info);
+
+  void Compute(OrtKernelContext *context);
+
+  void CpuAdaptivePool(const std::vector<int64_t> &input_size,
+                       const std::vector<int64_t> &output_size,
+                       const float *input_data, float *output_data);
+};
+
+struct AdaptivePool2dOp
+    : Ort::CustomOpBase<AdaptivePool2dOp, AdaptivePool2dKernel> {
+  explicit AdaptivePool2dOp(const char *provider) : provider_(provider) {}
+  void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const {
+    return new AdaptivePool2dKernel(api, info, provider_);
+  }
+
+  const char *GetName() const { return "AdaptivePool2d"; }
+
+  size_t GetInputTypeCount() const { return 1; }
+
+  ONNXTensorElementDataType GetInputType(size_t index) const {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+  }
+
+  size_t GetOutputTypeCount() const { return 1; }
+
+  ONNXTensorElementDataType GetOutputType(size_t index) const {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+  }
+
+  const char *GetExecutionProviderType() const { return provider_; }
+
+private:
+  const char *provider_;
+};
+
+} // namespace ultrainfer
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc
new file mode 100755
index 0000000000..26f0686edc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.cc
@@ -0,0 +1,287 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef NON_64_PLATFORM
+
+#include "ultrainfer/runtime/backends/ort/ops/multiclass_nms.h"
+
+#include <algorithm>
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+template <class T>
+bool SortScorePairDescend(const std::pair<float, T> &pair1,
+                          const std::pair<float, T> &pair2) {
+  return pair1.first > pair2.first;
+}
+
+void GetMaxScoreIndex(const float *scores, const int &score_size,
+                      const float &threshold, const int &top_k,
+                      std::vector<std::pair<float, int>> *sorted_indices) {
+  for (size_t i = 0; i < score_size; ++i) {
+    if (scores[i] > threshold) {
+      sorted_indices->push_back(std::make_pair(scores[i], i));
+    }
+  }
+  // Sort the score pair according to the scores in descending order
+  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
+                   SortScorePairDescend<int>);
+  // Keep top_k scores if needed.
+  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
+    sorted_indices->resize(top_k);
+  }
+}
+
+float BBoxArea(const float *box, const bool &normalized) {
+  if (box[2] < box[0] || box[3] < box[1]) {
+    // If coordinate values are is invalid
+    // (e.g. xmax < xmin or ymax < ymin), return 0.
+    return 0.f;
+  } else {
+    const float w = box[2] - box[0];
+    const float h = box[3] - box[1];
+    if (normalized) {
+      return w * h;
+    } else {
+      // If coordinate values are not within range [0, 1].
+      return (w + 1) * (h + 1);
+    }
+  }
+}
+
+float JaccardOverlap(const float *box1, const float *box2,
+                     const bool &normalized) {
+  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
+      box2[3] < box1[1]) {
+    return 0.f;
+  } else {
+    const float inter_xmin = std::max(box1[0], box2[0]);
+    const float inter_ymin = std::max(box1[1], box2[1]);
+    const float inter_xmax = std::min(box1[2], box2[2]);
+    const float inter_ymax = std::min(box1[3], box2[3]);
+    float norm = normalized ? 0.0f : 1.0f;
+    float inter_w = inter_xmax - inter_xmin + norm;
+    float inter_h = inter_ymax - inter_ymin + norm;
+    const float inter_area = inter_w * inter_h;
+    const float bbox1_area = BBoxArea(box1, normalized);
+    const float bbox2_area = BBoxArea(box2, normalized);
+    return inter_area / (bbox1_area + bbox2_area - inter_area);
+  }
+}
+
+void MultiClassNmsKernel::FastNMS(const float *boxes, const float *scores,
+                                  const int &num_boxes,
+                                  std::vector<int> *keep_indices) {
+  std::vector<std::pair<float, int>> sorted_indices;
+  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
+                   &sorted_indices);
+
+  float adaptive_threshold = nms_threshold;
+  while (sorted_indices.size() != 0) {
+    const int idx = sorted_indices.front().second;
+    bool keep = true;
+    for (size_t k = 0; k < keep_indices->size(); ++k) {
+      if (!keep) {
+        break;
+      }
+      const int kept_idx = (*keep_indices)[k];
+      float overlap =
+          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
+      keep = overlap <= adaptive_threshold;
+    }
+    if (keep) {
+      keep_indices->push_back(idx);
+    }
+    sorted_indices.erase(sorted_indices.begin());
+    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
+      adaptive_threshold *= nms_eta;
+    }
+  }
+}
+
+int MultiClassNmsKernel::NMSForEachSample(
+    const float *boxes, const float *scores, int num_boxes, int num_classes,
+    std::map<int, std::vector<int>> *keep_indices) {
+  for (int i = 0; i < num_classes; ++i) {
+    if (i == background_label) {
+      continue;
+    }
+    const float *score_for_class_i = scores + i * num_boxes;
+    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
+  }
+  int num_det = 0;
+  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
+    num_det += iter->second.size();
+  }
+
+  if (keep_top_k > -1 && num_det > keep_top_k) {
+    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
+    for (const auto &it : *keep_indices) {
+      int label = it.first;
+      const float *current_score = scores + label * num_boxes;
+      auto &label_indices = it.second;
+      for (size_t j = 0; j < label_indices.size(); ++j) {
+        int idx = label_indices[j];
+        score_index_pairs.push_back(
+            std::make_pair(current_score[idx], std::make_pair(label, idx)));
+      }
+    }
+    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
+                     SortScorePairDescend<std::pair<int, int>>);
+    score_index_pairs.resize(keep_top_k);
+
+    std::map<int, std::vector<int>> new_indices;
+    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
+      int label = score_index_pairs[j].second.first;
+      int idx = score_index_pairs[j].second.second;
+      new_indices[label].push_back(idx);
+    }
+    new_indices.swap(*keep_indices);
+    num_det = keep_top_k;
+  }
+  return num_det;
+}
+
+void MultiClassNmsKernel::Compute(OrtKernelContext *context) {
+#if ORT_API_VERSION >= 14
+  Ort::KernelContext ort_context{context};
+  Ort::ConstValue boxes = ort_context.GetInput(0);
+  Ort::ConstValue scores = ort_context.GetInput(1);
+#else
+  Ort::CustomOpApi api{ort_};
+  Ort::Unowned<const Ort::Value> boxes{
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 0))};
+  Ort::Unowned<const Ort::Value> scores{
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 1))};
+#endif
+
+  auto boxes_data = boxes.GetTensorData<float>();
+  auto scores_data = scores.GetTensorData<float>();
+
+  auto boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape();
+  auto scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape();
+
+  int score_size = scores_dim.size();
+
+  int64_t batch_size = scores_dim[0];
+  int64_t box_dim = boxes_dim[2];
+  int64_t out_dim = box_dim + 2;
+
+  int num_nmsed_out = 0;
+  FDASSERT(score_size == 3,
+           "Require rank of input scores be 3, but now it's %d.", score_size);
+  FDASSERT(boxes_dim[2] == 4,
+           "Require the 3-dimension of input boxes be 4, but now it's %ld.",
+           box_dim);
+  std::vector<int64_t> out_num_rois_dims = {batch_size};
+#if ORT_API_VERSION >= 14
+  auto out_num_rois = ort_context.GetOutput(2, out_num_rois_dims);
+#else
+  Ort::Unowned<Ort::Value> out_num_rois{api.KernelContext_GetOutput(
+      context, 2, out_num_rois_dims.data(), out_num_rois_dims.size())};
+#endif
+  int32_t *out_num_rois_data = out_num_rois.GetTensorMutableData<int32_t>();
+
+  std::vector<std::map<int, std::vector<int>>> all_indices;
+  for (size_t i = 0; i < batch_size; ++i) {
+    std::map<int, std::vector<int>> indices; // indices kept for each class
+    const float *current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float *current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
+                               boxes_dim[1], scores_dim[1], &indices);
+    num_nmsed_out += num;
+    out_num_rois_data[i] = num;
+    all_indices.emplace_back(indices);
+  }
+  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
+  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
+
+#if ORT_API_VERSION >= 14
+  auto out_box = ort_context.GetOutput(0, out_box_dims);
+  auto out_index = ort_context.GetOutput(1, out_index_dims);
+#else
+  Ort::Unowned<Ort::Value> out_box{api.KernelContext_GetOutput(
+      context, 0, out_box_dims.data(), out_box_dims.size())};
+  Ort::Unowned<Ort::Value> out_index{api.KernelContext_GetOutput(
+      context, 1, out_index_dims.data(), out_index_dims.size())};
+#endif
+
+  if (num_nmsed_out == 0) {
+    int32_t *out_num_rois_data = out_num_rois.GetTensorMutableData<int32_t>();
+    for (size_t i = 0; i < batch_size; ++i) {
+      out_num_rois_data[i] = 0;
+    }
+    return;
+  }
+  float *out_box_data = out_box.GetTensorMutableData<float>();
+  int32_t *out_index_data = out_index.GetTensorMutableData<int32_t>();
+
+  int count = 0;
+  for (size_t i = 0; i < batch_size; ++i) {
+    const float *current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float *current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    for (const auto &it : all_indices[i]) {
+      int label = it.first;
+      const auto &indices = it.second;
+      const float *current_scores_class_ptr =
+          current_scores_ptr + label * scores_dim[2];
+      for (size_t j = 0; j < indices.size(); ++j) {
+        int start = count * 6;
+        out_box_data[start] = label;
+        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
+
+        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
+        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
+        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
+
+        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
+        out_index_data[count] = i * boxes_dim[1] + indices[j];
+        count += 1;
+      }
+    }
+  }
+}
+
+void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo *info) {
+#if ORT_API_VERSION >= 14
+  Ort::ConstKernelInfo ort_info{info};
+  background_label = ort_info.GetAttribute<int64_t>("background_label");
+  keep_top_k = ort_info.GetAttribute<int64_t>("keep_top_k");
+  nms_eta = ort_info.GetAttribute<float>("nms_eta");
+  nms_threshold = ort_info.GetAttribute<float>("nms_threshold");
+  nms_top_k = ort_info.GetAttribute<int64_t>("nms_top_k");
+  normalized = ort_info.GetAttribute<int64_t>("normalized");
+  score_threshold = ort_info.GetAttribute<float>("score_threshold");
+#else
+  Ort::CustomOpApi api{ort_};
+  background_label =
+      api.KernelInfoGetAttribute<int64_t>(info, "background_label");
+  keep_top_k = api.KernelInfoGetAttribute<int64_t>(info, "keep_top_k");
+  nms_eta = api.KernelInfoGetAttribute<float>(info, "nms_eta");
+  nms_threshold = api.KernelInfoGetAttribute<float>(info, "nms_threshold");
+  nms_top_k = api.KernelInfoGetAttribute<int64_t>(info, "nms_top_k");
+  normalized = api.KernelInfoGetAttribute<int64_t>(info, "normalized");
+  score_threshold = api.KernelInfoGetAttribute<float>(info, "score_threshold");
+#endif
+}
+} // namespace ultrainfer
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h
new file mode 100755
index 0000000000..1225467f80
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ops/multiclass_nms.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+
+#ifndef NON_64_PLATFORM
+#include "onnxruntime_cxx_api.h" // NOLINT
+
+namespace ultrainfer {
+
+struct MultiClassNmsKernel {
+protected:
+  int64_t background_label = -1;
+  int64_t keep_top_k = -1;
+  float nms_eta;
+  float nms_threshold = 0.7;
+  int64_t nms_top_k;
+  bool normalized;
+  float score_threshold;
+  OrtApi ort_;
+
+public:
+  MultiClassNmsKernel(OrtApi ort, const OrtKernelInfo *info) : ort_(ort) {
+    GetAttribute(info);
+  }
+
+  void GetAttribute(const OrtKernelInfo *info);
+
+  void Compute(OrtKernelContext *context);
+  void FastNMS(const float *boxes, const float *scores, const int &num_boxes,
+               std::vector<int> *keep_indices);
+  int NMSForEachSample(const float *boxes, const float *scores, int num_boxes,
+                       int num_classes,
+                       std::map<int, std::vector<int>> *keep_indices);
+};
+
+struct MultiClassNmsOp
+    : Ort::CustomOpBase<MultiClassNmsOp, MultiClassNmsKernel> {
+  void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const {
+    return new MultiClassNmsKernel(api, info);
+  }
+
+  const char *GetName() const { return "MultiClassNMS"; }
+
+  size_t GetInputTypeCount() const { return 2; }
+
+  ONNXTensorElementDataType GetInputType(size_t index) const {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+  }
+
+  size_t GetOutputTypeCount() const { return 3; }
+
+  ONNXTensorElementDataType GetOutputType(size_t index) const {
+    if (index == 0) {
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+    }
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
+  }
+
+  const char *GetExecutionProviderType() const {
+    return "CPUExecutionProvider";
+  }
+};
+
+} // namespace ultrainfer
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h
new file mode 100755
index 0000000000..3916b7613b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_type.h"
+#include "ultrainfer/runtime/enum_variables.h"
+#include <iostream>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+namespace ultrainfer {
+
+/*! @brief Option object to configure ONNX Runtime backend
+ */
+struct OrtBackendOption {
+  /// Level of graph optimization,
+  ///         /-1: mean default(Enable all the optimization strategy)
+  ///         /0: disable all the optimization strategy/1: enable basic strategy
+  ///         /2:enable extend strategy/99: enable all
+  int graph_optimization_level = -1;
+  /// Number of threads to execute the operator, -1: default
+  int intra_op_num_threads = -1;
+  /// Number of threads to execute the graph,
+  ///         -1: default. This parameter only will bring effects
+  ///         while the `OrtBackendOption::execution_mode` set to 1.
+  int inter_op_num_threads = -1;
+  /// Execution mode for the graph, -1: default(Sequential mode)
+  ///         /0: Sequential mode, execute the operators in graph one by one.
+  ///         /1: Parallel mode, execute the operators in graph parallelly.
+  int execution_mode = -1;
+  /// Inference device, OrtBackend supports CPU/GPU
+  Device device = Device::CPU;
+  /// Inference device id
+  int device_id = 0;
+  void *external_stream_ = nullptr;
+  /// Use fp16 to infer
+  bool enable_fp16 = false;
+
+  std::vector<std::string> ort_disabled_ops_{};
+  void DisableOrtFP16OpTypes(const std::vector<std::string> &ops) {
+    ort_disabled_ops_.insert(ort_disabled_ops_.end(), ops.begin(), ops.end());
+  }
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc
new file mode 100755
index 0000000000..be96da8abf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/option_pybind.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/runtime/backends/ort/option.h"
+
+namespace ultrainfer {
+
+void BindOrtOption(pybind11::module &m) {
+  pybind11::class_<OrtBackendOption>(m, "OrtBackendOption")
+      .def(pybind11::init())
+      .def_readwrite("graph_optimization_level",
+                     &OrtBackendOption::graph_optimization_level)
+      .def_readwrite("intra_op_num_threads",
+                     &OrtBackendOption::intra_op_num_threads)
+      .def_readwrite("inter_op_num_threads",
+                     &OrtBackendOption::inter_op_num_threads)
+      .def_readwrite("execution_mode", &OrtBackendOption::execution_mode)
+      .def_readwrite("device", &OrtBackendOption::device)
+      .def_readwrite("device_id", &OrtBackendOption::device_id)
+      .def_readwrite("enable_fp16", &OrtBackendOption::enable_fp16)
+      .def("disable_ort_fp16_op_types",
+           &OrtBackendOption::DisableOrtFP16OpTypes);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc
new file mode 100755
index 0000000000..c6d48b1970
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.cc
@@ -0,0 +1,455 @@
+
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/ort/ort_backend.h"
+
+#include "ultrainfer/core/float16.h"
+#include "ultrainfer/runtime/backends/ort/ops/adaptive_pool2d.h"
+#include "ultrainfer/runtime/backends/ort/ops/multiclass_nms.h"
+#include "ultrainfer/runtime/backends/ort/utils.h"
+#include "ultrainfer/utils/utils.h"
+#ifdef ENABLE_PADDLE2ONNX
+#include "paddle2onnx/converter.h"
+#endif
+
+#include <memory>
+
+namespace ultrainfer {
+
+std::vector<OrtCustomOp *> OrtBackend::custom_operators_ =
+    std::vector<OrtCustomOp *>();
+
+bool OrtBackend::BuildOption(const OrtBackendOption &option) {
+  option_ = option;
+  if (option.graph_optimization_level >= 0) {
+    session_options_.SetGraphOptimizationLevel(
+        GraphOptimizationLevel(option.graph_optimization_level));
+  }
+  if (option.intra_op_num_threads > 0) {
+    session_options_.SetIntraOpNumThreads(option.intra_op_num_threads);
+  }
+  if (option.inter_op_num_threads > 0) {
+    session_options_.SetInterOpNumThreads(option.inter_op_num_threads);
+  }
+  if (option.execution_mode >= 0) {
+    session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
+  }
+
+#ifdef WITH_DIRECTML
+  // If use DirectML
+  if (option.device == Device::DIRECTML) {
+    auto all_providers = Ort::GetAvailableProviders();
+    bool support_dml = false;
+    std::string providers_msg = "";
+    for (size_t i = 0; i < all_providers.size(); ++i) {
+      providers_msg = providers_msg + all_providers[i] + ", ";
+      if (all_providers[i] == "DmlExecutionProvider") {
+        support_dml = true;
+      }
+    }
+
+    if (!support_dml) {
+      FDWARNING << "Compiled ultrainfer with onnxruntime doesn't "
+                   "support DirectML, the available providers are "
+                << providers_msg << "will fallback to CPUExecutionProvider."
+                << "Please check if DirectML is installed successfully."
+                << std::endl;
+      option_.device = Device::CPU;
+    } else {
+      // Must set as below when use dml.
+      session_options_.DisableMemPattern();
+      session_options_.SetExecutionMode(ExecutionMode(0));
+
+      // DML session_option
+      OrtApi const &ortApi = Ort::GetApi();
+      const OrtDmlApi *ortDmlApi;
+      ortApi.GetExecutionProviderApi(
+          "DML", ORT_API_VERSION, reinterpret_cast<const void **>(&ortDmlApi));
+      OrtStatus *onnx_dml_status =
+          ortDmlApi->SessionOptionsAppendExecutionProvider_DML(session_options_,
+                                                               0);
+      if (onnx_dml_status != nullptr) {
+        FDERROR
+            << "DirectML is not support in your machine, the program will exit."
+            << std::endl;
+        ortApi.ReleaseStatus(onnx_dml_status);
+        return false;
+      }
+    }
+    return true;
+  }
+#endif
+
+  // CUDA
+  if (option.device == Device::GPU) {
+    auto all_providers = Ort::GetAvailableProviders();
+    bool support_cuda = false;
+    std::string providers_msg = "";
+    for (size_t i = 0; i < all_providers.size(); ++i) {
+      providers_msg = providers_msg + all_providers[i] + ", ";
+      if (all_providers[i] == "CUDAExecutionProvider") {
+        support_cuda = true;
+      }
+    }
+    if (!support_cuda) {
+      FDWARNING << "Compiled ultrainfer with onnxruntime doesn't "
+                   "support GPU, the available providers are "
+                << providers_msg << "will fallback to CPUExecutionProvider."
+                << std::endl;
+      option_.device = Device::CPU;
+    } else {
+      OrtCUDAProviderOptions cuda_options;
+      cuda_options.device_id = option.device_id;
+      if (option.external_stream_) {
+        cuda_options.has_user_compute_stream = 1;
+        cuda_options.user_compute_stream = option.external_stream_;
+      }
+      session_options_.AppendExecutionProvider_CUDA(cuda_options);
+    }
+    return true;
+  }
+  return true;
+}
+
+bool OrtBackend::Init(const RuntimeOption &option) {
+  if (option.device != Device::CPU && option.device != Device::GPU &&
+      option.device != Device::DIRECTML) {
+    FDERROR
+        << "Backend::ORT only supports Device::CPU/Device::GPU, but now its "
+        << option.device << "." << std::endl;
+    return false;
+  }
+  OrtBackendOption ort_option = option.ort_option;
+  ort_option.device = option.device;
+  ort_option.device_id = option.device_id;
+  ort_option.external_stream_ = option.external_stream_;
+
+  if (option.model_format == ModelFormat::PADDLE) {
+    if (option.model_from_memory_) {
+      return InitFromPaddle(option.model_file, option.params_file, ort_option);
+    }
+    std::string model_buffer, params_buffer;
+    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
+             "Failed to read model file.");
+    FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
+             "Failed to read parameters file.");
+    return InitFromPaddle(model_buffer, params_buffer, ort_option);
+  } else if (option.model_format == ModelFormat::ONNX) {
+    if (option.model_from_memory_) {
+      return InitFromOnnx(option.model_file, ort_option);
+    }
+    std::string model_buffer;
+    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
+             "Failed to read model file.");
+    return InitFromOnnx(model_buffer, ort_option);
+  } else {
+    FDERROR << "Only support Paddle/ONNX model format for OrtBackend."
+            << std::endl;
+    return false;
+  }
+  return false;
+}
+
+bool OrtBackend::InitFromPaddle(const std::string &model_buffer,
+                                const std::string &params_buffer,
+                                const OrtBackendOption &option, bool verbose) {
+  if (initialized_) {
+    FDERROR << "OrtBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  char *model_content_ptr;
+  int model_content_size = 0;
+  bool save_external = false;
+#ifdef ENABLE_PADDLE2ONNX
+  std::vector<paddle2onnx::CustomOp> ops;
+  ops.resize(2);
+  strcpy(ops[0].op_name, "multiclass_nms3");
+  strcpy(ops[0].export_op_name, "MultiClassNMS");
+  strcpy(ops[1].op_name, "pool2d");
+  strcpy(ops[1].export_op_name, "AdaptivePool2d");
+  converted_to_fp16 = option.enable_fp16;
+
+  std::vector<char *> disable_fp16_ops;
+  for (auto i = 0; i < option.ort_disabled_ops_.size(); i++) {
+    auto one_type = option.ort_disabled_ops_[i];
+    char *charStr = new char[one_type.size() + 1];
+    std::strcpy(charStr, one_type.c_str());
+    disable_fp16_ops.push_back(charStr);
+  }
+  if (!paddle2onnx::Export(
+          model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(),
+          params_buffer.size(), &model_content_ptr, &model_content_size, 11,
+          true, verbose, true, true, true, ops.data(), 2, "onnxruntime",
+          nullptr, 0, "", &save_external, option.enable_fp16,
+          disable_fp16_ops.data(), option.ort_disabled_ops_.size())) {
+    FDERROR << "Error occured while export PaddlePaddle to ONNX format."
+            << std::endl;
+    return false;
+  }
+
+  std::string onnx_model_proto(model_content_ptr,
+                               model_content_ptr + model_content_size);
+  delete[] model_content_ptr;
+  model_content_ptr = nullptr;
+  if (save_external) {
+    model_file_name = "model.onnx";
+    std::fstream f(model_file_name, std::ios::out);
+    FDASSERT(f.is_open(), "Can not open file: %s to save model.",
+             model_file_name.c_str());
+    f << onnx_model_proto;
+    f.close();
+  }
+  return InitFromOnnx(onnx_model_proto, option);
+#else
+  FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to "
+             "call `InitFromOnnx` instead."
+          << std::endl;
+#endif
+  return false;
+}
+
+bool OrtBackend::InitFromOnnx(const std::string &model_file,
+                              const OrtBackendOption &option) {
+  if (initialized_) {
+    FDERROR << "OrtBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  std::string onnx_model_buffer;
+  if (!converted_to_fp16 && option.enable_fp16) {
+    if (option.device == Device::CPU) {
+      FDWARNING << "Turning on FP16 on CPU may result in slower inference."
+                << std::endl;
+    }
+    char *model_content_ptr;
+    int model_content_size = 0;
+    paddle2onnx::ConvertFP32ToFP16(model_file.c_str(), model_file.size(),
+                                   &model_content_ptr, &model_content_size);
+    std::string onnx_model_proto(model_content_ptr,
+                                 model_content_ptr + model_content_size);
+    onnx_model_buffer = onnx_model_proto;
+  } else {
+    onnx_model_buffer = model_file;
+  }
+
+  if (!BuildOption(option)) {
+    FDERROR << "Create Ort option fail." << std::endl;
+    return false;
+  }
+
+  InitCustomOperators();
+  if (model_file_name.size()) {
+#ifdef WIN32
+    std::wstring widestr =
+        std::wstring(model_file_name.begin(), model_file_name.end());
+    session_ = {env_, widestr.c_str(), session_options_};
+#else
+    session_ = {env_, model_file_name.c_str(), session_options_};
+#endif
+  } else {
+    session_ = {env_, onnx_model_buffer.data(), onnx_model_buffer.size(),
+                session_options_};
+  }
+
+  binding_ = std::make_shared<Ort::IoBinding>(session_);
+
+  Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+  Ort::Allocator allocator(session_, memory_info);
+  size_t n_inputs = session_.GetInputCount();
+  for (size_t i = 0; i < n_inputs; ++i) {
+    auto input_name_ptr = session_.GetInputNameAllocated(i, allocator);
+    auto type_info = session_.GetInputTypeInfo(i);
+    std::vector<int64_t> shape =
+        type_info.GetTensorTypeAndShapeInfo().GetShape();
+    ONNXTensorElementDataType data_type =
+        type_info.GetTensorTypeAndShapeInfo().GetElementType();
+    inputs_desc_.emplace_back(
+        OrtValueInfo{input_name_ptr.get(), shape, data_type});
+  }
+
+  size_t n_outputs = session_.GetOutputCount();
+  for (size_t i = 0; i < n_outputs; ++i) {
+    auto output_name_ptr = session_.GetOutputNameAllocated(i, allocator);
+    auto type_info = session_.GetOutputTypeInfo(i);
+    std::vector<int64_t> shape =
+        type_info.GetTensorTypeAndShapeInfo().GetShape();
+    ONNXTensorElementDataType data_type =
+        type_info.GetTensorTypeAndShapeInfo().GetElementType();
+    outputs_desc_.emplace_back(
+        OrtValueInfo{output_name_ptr.get(), shape, data_type});
+
+    Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0,
+                                    OrtMemTypeDefault);
+    binding_->BindOutput(output_name_ptr.get(), out_memory_info);
+  }
+  initialized_ = true;
+  return true;
+}
+
+void OrtBackend::OrtValueToFDTensor(const Ort::Value &value, FDTensor *tensor,
+                                    const std::string &name, bool copy_to_fd) {
+  const auto info = value.GetTensorTypeAndShapeInfo();
+  const auto data_type = info.GetElementType();
+  size_t numel = info.GetElementCount();
+  auto shape = info.GetShape();
+  FDDataType dtype;
+
+  if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
+    dtype = FDDataType::FP32;
+    numel *= sizeof(float);
+  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
+    dtype = FDDataType::INT32;
+    numel *= sizeof(int32_t);
+  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
+    dtype = FDDataType::INT64;
+    numel *= sizeof(int64_t);
+  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
+    dtype = FDDataType::FP64;
+    numel *= sizeof(double);
+  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) {
+    dtype = FDDataType::FP16;
+    numel *= sizeof(float16);
+  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) {
+    dtype = FDDataType::UINT8;
+    numel *= sizeof(uint8_t);
+  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) {
+    dtype = FDDataType::INT8;
+    numel *= sizeof(int8_t);
+  } else {
+    FDASSERT(
+        false,
+        "Unrecognized data type of %d while calling OrtBackend::CopyToCpu().",
+        data_type);
+  }
+  const void *value_ptr = value.GetTensorData<void *>();
+  if (copy_to_fd) {
+    tensor->Resize(shape, dtype, name);
+    memcpy(tensor->MutableData(), value_ptr, numel);
+  } else {
+    tensor->name = name;
+    tensor->SetExternalData(shape, dtype, const_cast<void *>(value_ptr),
+                            Device::CPU);
+  }
+}
+
+bool OrtBackend::Infer(std::vector<FDTensor> &inputs,
+                       std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  if (inputs.size() != inputs_desc_.size()) {
+    FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << inputs_desc_.size() << ")." << std::endl;
+    return false;
+  }
+
+  // from FDTensor to Ort Inputs
+  RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU);
+    binding_->BindInput(inputs[i].name.c_str(), ort_value);
+  }
+
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0,
+                                OrtMemTypeDefault);
+    binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info);
+  }
+
+  // Inference with inputs
+  RUNTIME_PROFILE_LOOP_BEGIN(1)
+  try {
+    session_.Run({}, *(binding_.get()));
+  } catch (const std::exception &e) {
+    FDERROR << "Failed to Infer: " << e.what() << std::endl;
+    return false;
+  }
+  RUNTIME_PROFILE_LOOP_END
+
+  // Convert result after inference
+  std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
+  outputs->resize(ort_outputs.size());
+  for (size_t i = 0; i < ort_outputs.size(); ++i) {
+    OrtValueToFDTensor(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name,
+                       copy_to_fd);
+  }
+  RUNTIME_PROFILE_LOOP_H2D_D2H_END
+  return true;
+}
+
+TensorInfo OrtBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of inputs: %d.", index,
+           NumInputs());
+  TensorInfo info;
+  info.name = inputs_desc_[index].name;
+  info.shape.assign(inputs_desc_[index].shape.begin(),
+                    inputs_desc_[index].shape.end());
+  info.dtype = GetFdDtype(inputs_desc_[index].dtype);
+  return info;
+}
+
+std::vector<TensorInfo> OrtBackend::GetInputInfos() {
+  auto size = inputs_desc_.size();
+  std::vector<TensorInfo> infos;
+  infos.reserve(size);
+  for (auto i = 0; i < size; i++) {
+    infos.emplace_back(GetInputInfo(i));
+  }
+  return infos;
+}
+
+TensorInfo OrtBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs: %d.", index,
+           NumOutputs());
+  TensorInfo info;
+  info.name = outputs_desc_[index].name;
+  info.shape.assign(outputs_desc_[index].shape.begin(),
+                    outputs_desc_[index].shape.end());
+  info.dtype = GetFdDtype(outputs_desc_[index].dtype);
+  return info;
+}
+
+std::vector<TensorInfo> OrtBackend::GetOutputInfos() {
+  std::vector<TensorInfo> infos;
+  for (auto i = 0; i < outputs_desc_.size(); i++) {
+    infos.emplace_back(GetOutputInfo(i));
+  }
+  return infos;
+}
+
+void OrtBackend::InitCustomOperators() {
+#ifndef NON_64_PLATFORM
+  if (custom_operators_.size() == 0) {
+    MultiClassNmsOp *multiclass_nms = new MultiClassNmsOp{};
+    custom_operators_.push_back(multiclass_nms);
+    if (option_.device == Device::GPU) {
+      AdaptivePool2dOp *adaptive_pool2d =
+          new AdaptivePool2dOp{"CUDAExecutionProvider"};
+      custom_operators_.push_back(adaptive_pool2d);
+    } else {
+      AdaptivePool2dOp *adaptive_pool2d =
+          new AdaptivePool2dOp{"CPUExecutionProvider"};
+      custom_operators_.push_back(adaptive_pool2d);
+    }
+  }
+  for (size_t i = 0; i < custom_operators_.size(); ++i) {
+    custom_op_domain_.Add(custom_operators_[i]);
+  }
+  session_options_.Add(custom_op_domain_);
+#endif
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h
new file mode 100755
index 0000000000..058f30e5ed
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/ort_backend.h
@@ -0,0 +1,91 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "onnxruntime_cxx_api.h" // NOLINT
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/backends/ort/option.h"
+
+#ifdef WITH_DIRECTML
+#include "dml_provider_factory.h" // NOLINT
+#endif
+
+namespace ultrainfer {
+
+struct OrtValueInfo {
+  std::string name;
+  std::vector<int64_t> shape;
+  ONNXTensorElementDataType dtype;
+};
+
+class OrtBackend : public BaseBackend {
+public:
+  OrtBackend() {}
+  virtual ~OrtBackend() = default;
+
+  bool BuildOption(const OrtBackendOption &option);
+
+  bool Init(const RuntimeOption &option);
+
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+
+  int NumInputs() const override { return inputs_desc_.size(); }
+
+  int NumOutputs() const override { return outputs_desc_.size(); }
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+  static std::vector<OrtCustomOp *> custom_operators_;
+  void InitCustomOperators();
+
+private:
+  bool InitFromPaddle(const std::string &model_buffer,
+                      const std::string &params_buffer,
+                      const OrtBackendOption &option = OrtBackendOption(),
+                      bool verbose = false);
+
+  bool InitFromOnnx(const std::string &model_buffer,
+                    const OrtBackendOption &option = OrtBackendOption());
+
+  Ort::Env env_;
+  Ort::Session session_{nullptr};
+  Ort::SessionOptions session_options_;
+  std::shared_ptr<Ort::IoBinding> binding_;
+  std::vector<OrtValueInfo> inputs_desc_;
+  std::vector<OrtValueInfo> outputs_desc_;
+
+  // the ONNX model file name,
+  // when ONNX is bigger than 2G, we will set this name
+  std::string model_file_name;
+  // recored if the model has been converted to fp16
+  bool converted_to_fp16 = false;
+
+#ifndef NON_64_PLATFORM
+  Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
+#endif
+  OrtBackendOption option_;
+  void OrtValueToFDTensor(const Ort::Value &value, FDTensor *tensor,
+                          const std::string &name, bool copy_to_fd);
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc
new file mode 100755
index 0000000000..2892e449dc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/ort/utils.h"
+
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+ONNXTensorElementDataType GetOrtDtype(const FDDataType &fd_dtype) {
+  if (fd_dtype == FDDataType::FP32) {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+  } else if (fd_dtype == FDDataType::FP64) {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
+  } else if (fd_dtype == FDDataType::INT32) {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
+  } else if (fd_dtype == FDDataType::INT64) {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  } else if (fd_dtype == FDDataType::UINT8) {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
+  } else if (fd_dtype == FDDataType::INT8) {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8;
+  } else if (fd_dtype == FDDataType::FP16) {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
+  }
+  FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
+          << std::endl;
+  return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+}
+
+FDDataType GetFdDtype(const ONNXTensorElementDataType &ort_dtype) {
+  if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
+    return FDDataType::FP32;
+  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
+    return FDDataType::FP64;
+  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
+    return FDDataType::INT32;
+  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
+    return FDDataType::INT64;
+  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) {
+    return FDDataType::FP16;
+  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) {
+    return FDDataType::UINT8;
+  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) {
+    return FDDataType::INT8;
+  }
+  FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
+  return FDDataType::FP32;
+}
+
+Ort::Value CreateOrtValue(FDTensor &tensor, bool is_backend_cuda) {
+  FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
+           "Only support tensor which device is CPU or GPU for OrtBackend.");
+  if (tensor.device == Device::GPU && is_backend_cuda) {
+    Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0,
+                                OrtMemTypeDefault);
+    auto ort_value = Ort::Value::CreateTensor(
+        memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(),
+        tensor.shape.size(), GetOrtDtype(tensor.dtype));
+    return ort_value;
+  }
+  Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+  auto ort_value = Ort::Value::CreateTensor(
+      memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(),
+      tensor.shape.size(), GetOrtDtype(tensor.dtype));
+  return ort_value;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h
new file mode 100755
index 0000000000..9cce53b57a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/ort/utils.h
@@ -0,0 +1,39 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "onnxruntime_cxx_api.h" // NOLINT
+#include "ultrainfer/runtime/backends/backend.h"
+
+namespace ultrainfer {
+
+// Convert FDDataType to OrtDataType
+ONNXTensorElementDataType GetOrtDtype(const FDDataType &fd_dtype);
+
+// Convert OrtDataType to FDDataType
+FDDataType GetFdDtype(const ONNXTensorElementDataType &ort_dtype);
+
+// Create Ort::Value
+// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider
+// While is_backend_cuda = true, and tensor.device = Device::GPU
+// Will directly share the cuda data in tensor to OrtValue
+Ort::Value CreateOrtValue(FDTensor &tensor, bool is_backend_cuda = false);
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc
new file mode 100755
index 0000000000..baaa72ed33
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cc
@@ -0,0 +1,124 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(WITH_GPU)
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+std::vector<paddle::Tensor>
+postprocess_gpu(const std::vector<paddle::Tensor> &hm,
+                const std::vector<paddle::Tensor> &reg,
+                const std::vector<paddle::Tensor> &height,
+                const std::vector<paddle::Tensor> &dim,
+                const std::vector<paddle::Tensor> &vel,
+                const std::vector<paddle::Tensor> &rot,
+                const std::vector<float> &voxel_size,
+                const std::vector<float> &point_cloud_range,
+                const std::vector<float> &post_center_range,
+                const std::vector<int> &num_classes, const int down_ratio,
+                const float score_threshold, const float nms_iou_threshold,
+                const int nms_pre_max_size, const int nms_post_max_size,
+                const bool with_velocity);
+
+std::vector<paddle::Tensor>
+centerpoint_postprocess(const std::vector<paddle::Tensor> &hm,
+                        const std::vector<paddle::Tensor> &reg,
+                        const std::vector<paddle::Tensor> &height,
+                        const std::vector<paddle::Tensor> &dim,
+                        const std::vector<paddle::Tensor> &vel,
+                        const std::vector<paddle::Tensor> &rot,
+                        const std::vector<float> &voxel_size,
+                        const std::vector<float> &point_cloud_range,
+                        const std::vector<float> &post_center_range,
+                        const std::vector<int> &num_classes,
+                        const int down_ratio, const float score_threshold,
+                        const float nms_iou_threshold,
+                        const int nms_pre_max_size, const int nms_post_max_size,
+                        const bool with_velocity) {
+  if (hm[0].is_gpu()) {
+    return postprocess_gpu(hm, reg, height, dim, vel, rot, voxel_size,
+                           point_cloud_range, post_center_range, num_classes,
+                           down_ratio, score_threshold, nms_iou_threshold,
+                           nms_pre_max_size, nms_post_max_size, with_velocity);
+  } else {
+    PD_THROW("Unsupported device type for centerpoint postprocess "
+             "operator.");
+  }
+}
+
+std::vector<std::vector<int64_t>>
+PostProcessInferShape(const std::vector<std::vector<int64_t>> &hm_shape,
+                      const std::vector<std::vector<int64_t>> &reg_shape,
+                      const std::vector<std::vector<int64_t>> &height_shape,
+                      const std::vector<std::vector<int64_t>> &dim_shape,
+                      const std::vector<std::vector<int64_t>> &vel_shape,
+                      const std::vector<std::vector<int64_t>> &rot_shape,
+                      const std::vector<float> &voxel_size,
+                      const std::vector<float> &point_cloud_range,
+                      const std::vector<float> &post_center_range,
+                      const std::vector<int> &num_classes, const int down_ratio,
+                      const float score_threshold,
+                      const float nms_iou_threshold, const int nms_pre_max_size,
+                      const int nms_post_max_size, const bool with_velocity) {
+  if (with_velocity) {
+    return {{-1, 9}, {-1}, {-1}};
+  } else {
+    return {{-1, 7}, {-1}, {-1}};
+  }
+}
+
+std::vector<paddle::DataType>
+PostProcessInferDtype(const std::vector<paddle::DataType> &hm_dtype,
+                      const std::vector<paddle::DataType> &reg_dtype,
+                      const std::vector<paddle::DataType> &height_dtype,
+                      const std::vector<paddle::DataType> &dim_dtype,
+                      const std::vector<paddle::DataType> &vel_dtype,
+                      const std::vector<paddle::DataType> &rot_dtype) {
+  return {reg_dtype[0], hm_dtype[0], paddle::DataType::INT64};
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
+
+PD_BUILD_OP(centerpoint_postprocess)
+    .Inputs({paddle::Vec("HM"), paddle::Vec("REG"), paddle::Vec("HEIGHT"),
+             paddle::Vec("DIM"), paddle::Vec("VEL"), paddle::Vec("ROT")})
+    .Outputs({"BBOXES", "SCORES", "LABELS"})
+    .SetKernelFn(
+        PD_KERNEL(ultrainfer::paddle_custom_ops::centerpoint_postprocess))
+    .Attrs({"voxel_size: std::vector<float>",
+            "point_cloud_range: std::vector<float>",
+            "post_center_range: std::vector<float>",
+            "num_classes: std::vector<int>", "down_ratio: int",
+            "score_threshold: float", "nms_iou_threshold: float",
+            "nms_pre_max_size: int", "nms_post_max_size: int",
+            "with_velocity: bool"})
+    .SetInferShapeFn(
+        PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::PostProcessInferShape))
+    .SetInferDtypeFn(
+        PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::PostProcessInferDtype));
+
+#endif // WITH_GPU
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu
new file mode 100755
index 0000000000..de92cbf9b7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/centerpoint_postprocess_op.cu
@@ -0,0 +1,295 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+#define CHECK_INPUT_CUDA(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
+
+#define CHECK_INPUT_BATCHSIZE(x)                                               \
+  PD_CHECK(x.shape()[0] == 1, #x " batch size must be 1.")
+
+// #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+__host__ __device__ static inline int DIVUP(const int m, const int n) {
+  return ((m) / (n) + ((m) % (n) > 0));
+}
+
+static const int THREADS_PER_BLOCK_NMS = sizeof(int64_t) * 8;
+
+void NmsLauncher(const cudaStream_t &stream, const float *bboxes,
+                 const int *index, const int64_t *sorted_index,
+                 const int num_bboxes, const int num_bboxes_for_nms,
+                 const float nms_overlap_thresh, const int decode_bboxes_dims,
+                 int64_t *mask);
+
+__global__ void decode_kernel(
+    const float *score, const float *reg, const float *height, const float *dim,
+    const float *vel, const float *rot, const float score_threshold,
+    const int feat_w, const float down_ratio, const float voxel_size_x,
+    const float voxel_size_y, const float point_cloud_range_x_min,
+    const float point_cloud_range_y_min, const float post_center_range_x_min,
+    const float post_center_range_y_min, const float post_center_range_z_min,
+    const float post_center_range_x_max, const float post_center_range_y_max,
+    const float post_center_range_z_max, const int num_bboxes,
+    const bool with_velocity, const int decode_bboxes_dims, float *bboxes,
+    bool *mask, int *score_idx) {
+  int box_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (box_idx == num_bboxes || box_idx > num_bboxes) {
+    return;
+  }
+  const int xs = box_idx % feat_w;
+  const int ys = box_idx / feat_w;
+
+  float x = reg[box_idx];
+  float y = reg[box_idx + num_bboxes];
+  float z = height[box_idx];
+
+  bboxes[box_idx * decode_bboxes_dims] =
+      (x + xs) * down_ratio * voxel_size_x + point_cloud_range_x_min;
+  bboxes[box_idx * decode_bboxes_dims + 1] =
+      (y + ys) * down_ratio * voxel_size_y + point_cloud_range_y_min;
+  bboxes[box_idx * decode_bboxes_dims + 2] = z;
+  bboxes[box_idx * decode_bboxes_dims + 3] = dim[box_idx];
+  bboxes[box_idx * decode_bboxes_dims + 4] = dim[box_idx + num_bboxes];
+  bboxes[box_idx * decode_bboxes_dims + 5] = dim[box_idx + 2 * num_bboxes];
+  if (with_velocity) {
+    bboxes[box_idx * decode_bboxes_dims + 6] = vel[box_idx];
+    bboxes[box_idx * decode_bboxes_dims + 7] = vel[box_idx + num_bboxes];
+    bboxes[box_idx * decode_bboxes_dims + 8] =
+        atan2f(rot[box_idx], rot[box_idx + num_bboxes]);
+  } else {
+    bboxes[box_idx * decode_bboxes_dims + 6] =
+        atan2f(rot[box_idx], rot[box_idx + num_bboxes]);
+  }
+
+  if (score[box_idx] > score_threshold && x <= post_center_range_x_max &&
+      y <= post_center_range_y_max && z <= post_center_range_z_max &&
+      x >= post_center_range_x_min && y >= post_center_range_y_min &&
+      z >= post_center_range_z_min) {
+    mask[box_idx] = true;
+  }
+
+  score_idx[box_idx] = box_idx;
+}
+
+void DecodeLauncher(
+    const cudaStream_t &stream, const float *score, const float *reg,
+    const float *height, const float *dim, const float *vel, const float *rot,
+    const float score_threshold, const int feat_w, const float down_ratio,
+    const float voxel_size_x, const float voxel_size_y,
+    const float point_cloud_range_x_min, const float point_cloud_range_y_min,
+    const float post_center_range_x_min, const float post_center_range_y_min,
+    const float post_center_range_z_min, const float post_center_range_x_max,
+    const float post_center_range_y_max, const float post_center_range_z_max,
+    const int num_bboxes, const bool with_velocity,
+    const int decode_bboxes_dims, float *bboxes, bool *mask, int *score_idx) {
+  dim3 blocks(DIVUP(num_bboxes, THREADS_PER_BLOCK_NMS));
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  decode_kernel<<<blocks, threads, 0, stream>>>(
+      score, reg, height, dim, vel, rot, score_threshold, feat_w, down_ratio,
+      voxel_size_x, voxel_size_y, point_cloud_range_x_min,
+      point_cloud_range_y_min, post_center_range_x_min, post_center_range_y_min,
+      post_center_range_z_min, post_center_range_x_max, post_center_range_y_max,
+      post_center_range_z_max, num_bboxes, with_velocity, decode_bboxes_dims,
+      bboxes, mask, score_idx);
+}
+
+std::vector<paddle::Tensor>
+postprocess_gpu(const std::vector<paddle::Tensor> &hm,
+                const std::vector<paddle::Tensor> &reg,
+                const std::vector<paddle::Tensor> &height,
+                const std::vector<paddle::Tensor> &dim,
+                const std::vector<paddle::Tensor> &vel,
+                const std::vector<paddle::Tensor> &rot,
+                const std::vector<float> &voxel_size,
+                const std::vector<float> &point_cloud_range,
+                const std::vector<float> &post_center_range,
+                const std::vector<int> &num_classes, const int down_ratio,
+                const float score_threshold, const float nms_iou_threshold,
+                const int nms_pre_max_size, const int nms_post_max_size,
+                const bool with_velocity) {
+  int num_tasks = hm.size();
+  int decode_bboxes_dims = 9;
+  if (!with_velocity) {
+    decode_bboxes_dims = 7;
+  }
+  float voxel_size_x = voxel_size[0];
+  float voxel_size_y = voxel_size[1];
+  float point_cloud_range_x_min = point_cloud_range[0];
+  float point_cloud_range_y_min = point_cloud_range[1];
+
+  float post_center_range_x_min = post_center_range[0];
+  float post_center_range_y_min = post_center_range[1];
+  float post_center_range_z_min = post_center_range[2];
+  float post_center_range_x_max = post_center_range[3];
+  float post_center_range_y_max = post_center_range[4];
+  float post_center_range_z_max = post_center_range[5];
+  std::vector<paddle::Tensor> scores;
+  std::vector<paddle::Tensor> labels;
+  std::vector<paddle::Tensor> bboxes;
+  for (int task_id = 0; task_id < num_tasks; ++task_id) {
+    CHECK_INPUT_BATCHSIZE(hm[0]);
+
+    int feat_h = hm[0].shape()[2];
+    int feat_w = hm[0].shape()[3];
+    int num_bboxes = feat_h * feat_w;
+
+    // score and label
+    auto sigmoid_hm_per_task = paddle::experimental::sigmoid(hm[task_id]);
+    auto label_per_task =
+        paddle::experimental::argmax(sigmoid_hm_per_task, 1, true, false);
+    auto score_per_task =
+        paddle::experimental::max(sigmoid_hm_per_task, {1}, true);
+    // dim
+    auto exp_dim_per_task = paddle::experimental::exp(dim[task_id]);
+
+    // decode bboxed and get mask of bboxes for nms
+    const float *score_ptr = score_per_task.data<float>();
+    const float *reg_ptr = reg[task_id].data<float>();
+    const float *height_ptr = height[task_id].data<float>();
+    // const float* dim_ptr = dim[task_id].data<float>();
+    const float *exp_dim_per_task_ptr = exp_dim_per_task.data<float>();
+    const float *vel_ptr = vel[task_id].data<float>();
+    const float *rot_ptr = rot[task_id].data<float>();
+    auto decode_bboxes =
+        paddle::empty({num_bboxes, decode_bboxes_dims},
+                      paddle::DataType::FLOAT32, paddle::GPUPlace());
+    float *decode_bboxes_ptr = decode_bboxes.data<float>();
+    auto thresh_mask = paddle::full({num_bboxes}, 0, paddle::DataType::BOOL,
+                                    paddle::GPUPlace());
+    bool *thresh_mask_ptr = thresh_mask.data<bool>();
+    auto score_idx = paddle::empty({num_bboxes}, paddle::DataType::INT32,
+                                   paddle::GPUPlace());
+    int *score_idx_ptr = score_idx.data<int32_t>();
+
+    DecodeLauncher(score_per_task.stream(), score_ptr, reg_ptr, height_ptr,
+                   exp_dim_per_task_ptr, vel_ptr, rot_ptr, score_threshold,
+                   feat_w, down_ratio, voxel_size_x, voxel_size_y,
+                   point_cloud_range_x_min, point_cloud_range_y_min,
+                   post_center_range_x_min, post_center_range_y_min,
+                   post_center_range_z_min, post_center_range_x_max,
+                   post_center_range_y_max, post_center_range_z_max, num_bboxes,
+                   with_velocity, decode_bboxes_dims, decode_bboxes_ptr,
+                   thresh_mask_ptr, score_idx_ptr);
+
+    // select score by mask
+    auto selected_score_idx =
+        paddle::experimental::masked_select(score_idx, thresh_mask);
+    auto flattened_selected_score =
+        paddle::experimental::reshape(score_per_task, {num_bboxes});
+    auto selected_score = paddle::experimental::masked_select(
+        flattened_selected_score, thresh_mask);
+    int num_selected = selected_score.numel();
+    if (num_selected == 0 || num_selected < 0) {
+      auto fake_out_boxes =
+          paddle::full({1, decode_bboxes_dims}, 0., paddle::DataType::FLOAT32,
+                       paddle::GPUPlace());
+      auto fake_out_score =
+          paddle::full({1}, -1., paddle::DataType::FLOAT32, paddle::GPUPlace());
+      auto fake_out_label =
+          paddle::full({1}, 0, paddle::DataType::INT64, paddle::GPUPlace());
+      scores.push_back(fake_out_score);
+      labels.push_back(fake_out_label);
+      bboxes.push_back(fake_out_boxes);
+      continue;
+    }
+
+    // sort score by descending
+    auto sort_out = paddle::experimental::argsort(selected_score, 0, true);
+    auto sorted_index = std::get<1>(sort_out);
+    int num_bboxes_for_nms =
+        num_selected > nms_pre_max_size ? nms_pre_max_size : num_selected;
+
+    // nms
+    // in NmsLauncher, rot = - theta - pi / 2
+    int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS);
+    auto nms_mask = paddle::empty({num_bboxes_for_nms * col_blocks},
+                                  paddle::DataType::INT64, paddle::GPUPlace());
+    int64_t *nms_mask_data = nms_mask.data<int64_t>();
+
+    NmsLauncher(score_per_task.stream(), decode_bboxes.data<float>(),
+                selected_score_idx.data<int>(), sorted_index.data<int64_t>(),
+                num_selected, num_bboxes_for_nms, nms_iou_threshold,
+                decode_bboxes_dims, nms_mask_data);
+
+    const paddle::Tensor nms_mask_cpu_tensor =
+        nms_mask.copy_to(paddle::CPUPlace(), true);
+    const int64_t *nms_mask_cpu = nms_mask_cpu_tensor.data<int64_t>();
+
+    auto remv_cpu = paddle::full({col_blocks}, 0, paddle::DataType::INT64,
+                                 paddle::CPUPlace());
+    int64_t *remv_cpu_data = remv_cpu.data<int64_t>();
+    int num_to_keep = 0;
+    auto keep = paddle::empty({num_bboxes_for_nms}, paddle::DataType::INT32,
+                              paddle::CPUPlace());
+    int *keep_data = keep.data<int>();
+
+    for (int i = 0; i < num_bboxes_for_nms; i++) {
+      int nblock = i / THREADS_PER_BLOCK_NMS;
+      int inblock = i % THREADS_PER_BLOCK_NMS;
+
+      if (!(remv_cpu_data[nblock] & (1ULL << inblock))) {
+        keep_data[num_to_keep++] = i;
+        const int64_t *p = &nms_mask_cpu[0] + i * col_blocks;
+        for (int j = nblock; j < col_blocks; j++) {
+          remv_cpu_data[j] |= p[j];
+        }
+      }
+    }
+
+    int num_for_gather =
+        num_to_keep > nms_post_max_size ? nms_post_max_size : num_to_keep;
+    auto keep_gpu = paddle::empty({num_for_gather}, paddle::DataType::INT32,
+                                  paddle::GPUPlace());
+    int *keep_gpu_ptr = keep_gpu.data<int>();
+    cudaMemcpy(keep_gpu_ptr, keep_data, num_for_gather * sizeof(int),
+               cudaMemcpyHostToDevice);
+
+    auto gather_sorted_index =
+        paddle::experimental::gather(sorted_index, keep_gpu, 0);
+    auto gather_index = paddle::experimental::gather(selected_score_idx,
+                                                     gather_sorted_index, 0);
+
+    auto gather_score =
+        paddle::experimental::gather(selected_score, gather_sorted_index, 0);
+    auto flattened_label =
+        paddle::experimental::reshape(label_per_task, {num_bboxes});
+    auto gather_label =
+        paddle::experimental::gather(flattened_label, gather_index, 0);
+    auto gather_bbox =
+        paddle::experimental::gather(decode_bboxes, gather_index, 0);
+    auto start_label = paddle::full(
+        {1}, num_classes[task_id], paddle::DataType::INT64, paddle::GPUPlace());
+    auto added_label = paddle::experimental::add(gather_label, start_label);
+    scores.push_back(gather_score);
+    labels.push_back(added_label);
+    bboxes.push_back(gather_bbox);
+  }
+
+  auto out_scores = paddle::experimental::concat(scores, 0);
+  auto out_labels = paddle::experimental::concat(labels, 0);
+  auto out_bboxes = paddle::experimental::concat(bboxes, 0);
+  return {out_bboxes, out_scores, out_labels};
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc
new file mode 100755
index 0000000000..f8784fdb7a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cc
@@ -0,0 +1,100 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(WITH_GPU)
+
+#include "grid_sample_3d.h"
+
+#include <vector>
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+std::vector<paddle::Tensor>
+GridSample3DCUDAForward(const paddle::Tensor &x, const paddle::Tensor &grid,
+                        const std::string &mode,
+                        const std::string &padding_mode, bool align_corners);
+
+std::vector<paddle::Tensor> GridSample3DForward(const paddle::Tensor &x,
+                                                const paddle::Tensor &grid,
+                                                const std::string &mode,
+                                                const std::string &padding_mode,
+                                                bool align_corners) {
+  return GridSample3DCUDAForward(x, grid, mode, padding_mode, align_corners);
+}
+
+std::vector<paddle::Tensor>
+GridSample3DCUDABackward(const paddle::Tensor &x, const paddle::Tensor &grid,
+                         const paddle::Tensor &grad_out,
+                         const std::string &mode,
+                         const std::string &padding_mode, bool align_corners);
+
+std::vector<paddle::Tensor>
+GridSample3DBackward(const paddle::Tensor &x, const paddle::Tensor &grid,
+                     const paddle::Tensor &grad_out, const std::string &mode,
+                     const std::string &padding_mode, bool align_corners) {
+  return GridSample3DCUDABackward(x, grid, grad_out, mode, padding_mode,
+                                  align_corners);
+}
+
+std::vector<std::vector<int64_t>>
+GridSample3DInferShape(std::vector<int64_t> x_shape,
+                       std::vector<int64_t> grid_shape) {
+  return {
+      {x_shape[0], x_shape[1], grid_shape[1], grid_shape[2], grid_shape[3]}};
+}
+
+std::vector<std::vector<int64_t>>
+GridSample3DInferBackShape(std::vector<int64_t> x_shape,
+                           std::vector<int64_t> grid_shape) {
+  return {x_shape};
+}
+
+std::vector<paddle::DataType>
+GridSample3DInferDtype(paddle::DataType x_dtype, paddle::DataType grid_dtype) {
+  return {x_dtype};
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
+
+PD_BUILD_OP(grid_sample_3d)
+    .Inputs({"x", "grid"})
+    .Attrs({"mode: std::string", "padding_mode: std::string",
+            "align_corners: bool"})
+    .Outputs({"out"})
+    .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::GridSample3DForward))
+    .SetInferShapeFn(
+        PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::GridSample3DInferShape))
+    .SetInferDtypeFn(
+        PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::GridSample3DInferDtype));
+
+PD_BUILD_GRAD_OP(grid_sample_3d)
+    .Inputs({"x", "grid", paddle::Grad("out")})
+    .Attrs({"mode: std::string", "padding_mode: std::string",
+            "align_corners: bool"})
+    .Outputs({paddle::Grad("x")})
+    .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::GridSample3DBackward))
+    .SetInferShapeFn(PD_INFER_SHAPE(
+        ultrainfer::paddle_custom_ops::GridSample3DInferBackShape));
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu
new file mode 100755
index 0000000000..d8847de863
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.cu
@@ -0,0 +1,658 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cuda.h>
+
+#include "grid_sample_3d.h"
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+#define CHECK_INPUT_GPU(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
+
+static __forceinline__ __device__ bool
+InBounds3D(int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W) {
+  return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W;
+}
+
+#define CUDA_KERNEL_LOOP_TYPE(i, n, index_type)                                \
+  index_type _i_n_d_e_x = blockIdx.x * blockDim.x + threadIdx.x;               \
+  for (index_type i = _i_n_d_e_x; _i_n_d_e_x < (n);                            \
+       _i_n_d_e_x += blockDim.x * gridDim.x, i = _i_n_d_e_x)
+
+#define CUDA_KERNEL_LOOP(i, n) CUDA_KERNEL_LOOP_TYPE(i, n, int)
+
+template <typename T>
+static __forceinline__ __device__ T Unnormalize(T coord, int size,
+                                                bool align_corners) {
+  if (align_corners) {
+    return ((coord + 1.f) / 2) * (size - 1);
+  } else {
+    return ((coord + 1.f) * size - 1) / 2;
+  }
+}
+
+template <typename T>
+static __forceinline__ __device__ T ClipIndexes(T in, int max_value) {
+  return min(static_cast<T>(max_value), max(in, static_cast<T>(0)));
+}
+
+template <typename T>
+static __forceinline__ __device__ T ReflectIndexes(T in, int twice_low,
+                                                   int twice_high) {
+  if (twice_low == twice_high) {
+    return static_cast<T>(0);
+  }
+  T min = static_cast<T>(twice_low) / 2;
+  T span = static_cast<T>(twice_high - twice_low) / 2;
+  in = fabs(in - min);
+  T extra = fmod(in, span);
+  int flips = static_cast<int>(floor(in / span));
+  if (flips % 2 == 0) {
+    return extra + min;
+  } else {
+    return span - extra + min;
+  }
+}
+
+template <typename T>
+static __forceinline__ __device__ T ComputePositions(T coord, int size,
+                                                     PaddingMode padding_mode,
+                                                     bool align_corners) {
+  coord = Unnormalize<T>(coord, size, align_corners);
+  if (padding_mode == PaddingMode::border) {
+    coord = ClipIndexes(coord, size - 1);
+  } else if (padding_mode == PaddingMode::reflect) {
+    if (align_corners) {
+      coord = ReflectIndexes(coord, 0, 2 * (size - 1));
+    } else {
+      coord = ReflectIndexes(coord, -1, 2 * size - 1);
+    }
+    coord = ClipIndexes(coord, size - 1);
+  }
+  return coord;
+}
+
+template <typename T, typename index_t>
+__global__ void
+GridSample3DCudaKernel(const index_t nthreads, index_t out_c, index_t out_d,
+                       index_t out_h, index_t out_w, index_t in_d, index_t in_h,
+                       index_t in_w, const T *input, const T *grid, T *output,
+                       const Mode interpolation_mode,
+                       const PaddingMode padding_mode, bool align_corners) {
+  // printf("size: %d, %d, %d, %d, %d, %d \n", out_c, out_d, out_w, out_h, in_d,
+  // in_w);
+  index_t inp_sW = 1;
+  index_t inp_sH = in_w;
+  index_t inp_sD = in_h * in_w;
+  index_t inp_sC = in_d * inp_sD;
+  index_t inp_sN = out_c * inp_sC;
+
+  index_t grid_sCoor = 1;
+  index_t grid_sW = 3;
+  index_t grid_sH = out_w * grid_sW;
+  index_t grid_sD = out_h * grid_sH;
+  index_t grid_sN = out_d * grid_sD;
+
+  index_t out_sW = 1;
+  index_t out_sH = out_w;
+  index_t out_sD = out_h * out_w;
+  index_t out_sC = out_d * out_sD;
+  index_t out_sN = out_c * out_sC;
+
+  CUDA_KERNEL_LOOP_TYPE(index, nthreads, index_t) {
+    const index_t w = index % out_w;
+    const index_t h = (index / out_w) % out_h;
+    const index_t d = (index / (out_h * out_w)) % out_d;
+    const index_t n = index / (out_d * out_h * out_w);
+    const index_t grid_offset =
+        n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW;
+    // get the corresponding input x, y, z co-ordinates from grid
+    T ix = grid[grid_offset];
+    T iy = grid[grid_offset + grid_sCoor];
+    T iz = grid[grid_offset + 2 * grid_sCoor];
+    ix = ComputePositions(ix, in_w, padding_mode, align_corners);
+    iy = ComputePositions(iy, in_h, padding_mode, align_corners);
+    iz = ComputePositions(iz, in_d, padding_mode, align_corners);
+    // printf("ix: %f, iy: %f, iz: %f \n", ix, iy, iz);
+    if (interpolation_mode == Mode::bilinear) {
+      // get corner pixel values from (x, y, z)
+      // for 4d, we used north-east-south-west
+      // for 5d, we add top-bottom
+      index_t ix_tnw = static_cast<index_t>(std::floor(ix));
+      index_t iy_tnw = static_cast<index_t>(std::floor(iy));
+      index_t iz_tnw = static_cast<index_t>(std::floor(iz));
+
+      index_t ix_tne = ix_tnw + 1;
+      index_t iy_tne = iy_tnw;
+      index_t iz_tne = iz_tnw;
+
+      index_t ix_tsw = ix_tnw;
+      index_t iy_tsw = iy_tnw + 1;
+      index_t iz_tsw = iz_tnw;
+
+      index_t ix_tse = ix_tnw + 1;
+      index_t iy_tse = iy_tnw + 1;
+      index_t iz_tse = iz_tnw;
+
+      index_t ix_bnw = ix_tnw;
+      index_t iy_bnw = iy_tnw;
+      index_t iz_bnw = iz_tnw + 1;
+
+      index_t ix_bne = ix_tnw + 1;
+      index_t iy_bne = iy_tnw;
+      index_t iz_bne = iz_tnw + 1;
+
+      index_t ix_bsw = ix_tnw;
+      index_t iy_bsw = iy_tnw + 1;
+      index_t iz_bsw = iz_tnw + 1;
+
+      index_t ix_bse = ix_tnw + 1;
+      index_t iy_bse = iy_tnw + 1;
+      index_t iz_bse = iz_tnw + 1;
+
+      // get surfaces to each neighbor:
+      T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz);
+      T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz);
+      T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz);
+      T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz);
+      T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse);
+      T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw);
+      T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne);
+      T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw);
+
+      auto inp_ptr_NC = input + n * inp_sN;
+      auto out_ptr_NCDHW =
+          output + n * out_sN + d * out_sD + h * out_sH + w * out_sW;
+      for (index_t c = 0; c < out_c;
+           ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) {
+        *out_ptr_NCDHW = static_cast<T>(0);
+        if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] *
+              tnw;
+        }
+        if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] *
+              tne;
+        }
+        if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] *
+              tsw;
+        }
+        if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] *
+              tse;
+        }
+        if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] *
+              bnw;
+        }
+        if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] *
+              bne;
+        }
+        if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] *
+              bsw;
+        }
+        if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW +=
+              inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] *
+              bse;
+        }
+      }
+    } else if (interpolation_mode == Mode::nearest) {
+      index_t ix_nearest = static_cast<index_t>(std::round(ix));
+      index_t iy_nearest = static_cast<index_t>(std::round(iy));
+      index_t iz_nearest = static_cast<index_t>(std::round(iz));
+
+      // assign nearest neighor pixel value to output pixel
+      auto inp_ptr_NC = input + n * inp_sN;
+      auto out_ptr_NCDHW =
+          output + n * out_sN + d * out_sD + h * out_sH + w * out_sW;
+      for (index_t c = 0; c < out_c;
+           ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) {
+        if (InBounds3D(iz_nearest, iy_nearest, ix_nearest, in_d, in_h, in_w)) {
+          *out_ptr_NCDHW =
+              inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH +
+                         ix_nearest * inp_sW];
+        } else {
+          *out_ptr_NCDHW = static_cast<T>(0);
+        }
+      }
+    }
+  }
+}
+
+std::vector<paddle::Tensor>
+GridSample3DCUDAForward(const paddle::Tensor &x, const paddle::Tensor &grid,
+                        const std::string &mode,
+                        const std::string &padding_mode, bool align_corners) {
+  CHECK_INPUT_GPU(x);
+  CHECK_INPUT_GPU(grid);
+  PaddingMode enum_padding_mode;
+  Mode enum_mode;
+  if (padding_mode == "border") {
+    enum_padding_mode = PaddingMode::border;
+  } else if (padding_mode == "reflection") {
+    enum_padding_mode = PaddingMode::reflect;
+  } else {
+    enum_padding_mode = PaddingMode::zeros;
+  }
+
+  if (mode == "nearest") {
+    enum_mode = Mode::nearest;
+  } else {
+    enum_mode = Mode::bilinear;
+  }
+  const int n = grid.shape()[0];
+  const int out_d = grid.shape()[1];
+  const int out_h = grid.shape()[2];
+  const int out_w = grid.shape()[3];
+  const int c = x.shape()[1];
+  const int in_d = x.shape()[2];
+  const int in_h = x.shape()[3];
+  const int in_w = x.shape()[4];
+
+  auto output = paddle::full({n, c, out_d, out_h, out_w}, 0,
+                             paddle::DataType::FLOAT32, paddle::GPUPlace());
+  const int count = static_cast<int>(n * out_d * out_h * out_w);
+
+  int max_threads_per_block = 512;
+  int block_num = (count - 1) / max_threads_per_block + 1;
+  // printf("size: %d, %d, %d, %d, %d, %d \n", n, c, out_d, out_h, count,
+  // block_num);
+  GridSample3DCudaKernel<float, int>
+      <<<block_num, max_threads_per_block, 0, x.stream()>>>(
+          count, c, out_d, out_h, out_w, in_d, in_h, in_w, x.data<float>(),
+          grid.data<float>(), output.data<float>(), enum_mode,
+          enum_padding_mode, align_corners);
+
+  cudaError_t error_check;
+  error_check = cudaGetLastError();
+  if (error_check != cudaSuccess) {
+    printf("%s\n", cudaGetErrorString(error_check));
+  }
+  // printf("size: %d, %d, %d, %d, %d, %d \n", n, c, out_d, out_h, count,
+  // block_num);
+  return {output};
+}
+
+template <typename T>
+static __forceinline__ __device__ T UnnormalizeWithMask(T coord, int size,
+                                                        bool align_corners,
+                                                        T *grad_in) {
+  if (align_corners) {
+    *grad_in = static_cast<T>(size - 1) / 2;
+    return ((coord + 1.f) / 2) * (size - 1);
+  } else {
+    *grad_in = static_cast<T>(size) / 2;
+    return ((coord + 1.f) * size - 1) / 2;
+  }
+}
+
+template <typename T>
+static __forceinline__ __device__ T ClipIndexesWithMask(T in, int clip_limit,
+                                                        T *grad_in) {
+  if (in <= static_cast<T>(0)) {
+    *grad_in = static_cast<T>(0);
+    return static_cast<T>(0);
+  } else {
+    T max = static_cast<T>(clip_limit - 1);
+    if (in >= max) {
+      *grad_in = static_cast<T>(0);
+      return max;
+    } else {
+      *grad_in = static_cast<T>(1);
+      return in;
+    }
+  }
+}
+
+template <typename T>
+static __forceinline__ __device__ T ReflectIndexesWithMask(T in, int twice_low,
+                                                           int twice_high,
+                                                           T *grad_in) {
+  if (twice_low == twice_high) {
+    *grad_in = static_cast<T>(0);
+    return static_cast<T>(0);
+  }
+  int grad_in_mult_;
+  T min = static_cast<T>(twice_low) / 2;
+  T span = static_cast<T>(twice_high - twice_low) / 2;
+  in = in - min;
+  if (in < static_cast<T>(0)) {
+    grad_in_mult_ = -1;
+    in = -in;
+  } else {
+    grad_in_mult_ = 1;
+  }
+  T extra = fmod(in, span);
+  int flips = static_cast<int>(floor(in / span));
+  if (flips % 2 == 0) {
+    *grad_in = static_cast<T>(grad_in_mult_);
+    return extra + min;
+  } else {
+    *grad_in = static_cast<T>(-grad_in_mult_);
+    return span - extra + min;
+  }
+}
+
+template <typename T>
+static __forceinline__ __device__ T
+ComputePositionsWithMask(T coord, int size, PaddingMode padding_mode,
+                         bool align_corners, T *grad_in) {
+  T grad_clip, grad_refl;
+  coord = UnnormalizeWithMask<T>(coord, size, align_corners, grad_in);
+  if (padding_mode == PaddingMode::border) {
+    coord = ClipIndexesWithMask(coord, size, &grad_clip);
+    *grad_in = (*grad_in) * grad_clip;
+  } else if (padding_mode == PaddingMode::reflect) {
+    if (align_corners) {
+      coord = ReflectIndexesWithMask(coord, 0, 2 * (size - 1), &grad_refl);
+    } else {
+      coord = ReflectIndexesWithMask(coord, -1, 2 * size - 1, &grad_refl);
+    }
+    coord = ClipIndexesWithMask(coord, size, &grad_clip);
+    *grad_in = (*grad_in) * grad_refl * grad_clip;
+  }
+
+  return coord;
+}
+
+template <typename T>
+static __forceinline__ __device__ void
+AtomicAdd3D(T *data, int64_t d, int64_t h, int64_t w, int64_t sD, int64_t sH,
+            int64_t sW, int64_t D, int64_t H, int64_t W, T delta) {
+  if (InBounds3D(d, h, w, D, H, W)) {
+    atomicAdd(data + d * sD + h * sH + w * sW, delta);
+  }
+}
+
+template <typename T, typename index_t>
+__global__ void GridSample3DCudaBackwardKernel(
+    const index_t nthreads, const T *grad_output, const T *input, const T *grid,
+    index_t out_c, index_t out_d, index_t out_h, index_t out_w, index_t in_d,
+    index_t in_h, index_t in_w, T *grad_input, T *grad_grid, const Mode mode,
+    const PaddingMode padding_mode, bool align_corners) {
+  index_t inp_sW = 1;
+  index_t inp_sH = in_w;
+  index_t inp_sD = in_h * in_w;
+  index_t inp_sC = in_d * inp_sD;
+  index_t inp_sN = out_c * inp_sC;
+
+  index_t grid_sCoor = 1;
+  index_t grid_sW = 3;
+  index_t grid_sH = out_w * grid_sW;
+  index_t grid_sD = out_h * grid_sH;
+  index_t grid_sN = out_d * grid_sD;
+
+  index_t gOut_sW = 1;
+  index_t gOut_sH = out_w;
+  index_t gOut_sD = out_h * out_w;
+  index_t gOut_sC = out_d * gOut_sD;
+  index_t gOut_sN = out_c * gOut_sC;
+
+  CUDA_KERNEL_LOOP_TYPE(index, nthreads, index_t) {
+    const index_t w = index % out_w;
+    const index_t h = (index / out_w) % out_h;
+    const index_t d = (index / (out_h * out_w)) % out_d;
+    const index_t n = index / (out_d * out_h * out_w);
+    const auto grid_offset =
+        n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW;
+
+    // get the corresponding input x, y, z co-ordinates from grid
+    T ix = grid[grid_offset];
+    T iy = grid[grid_offset + grid_sCoor];
+    T iz = grid[grid_offset + 2 * grid_sCoor];
+
+    // multipliers for gradients on ix, iy, and iz
+    T gix_mult, giy_mult, giz_mult;
+    ix = ComputePositionsWithMask(ix, in_w, padding_mode, align_corners,
+                                  &gix_mult);
+    iy = ComputePositionsWithMask(iy, in_h, padding_mode, align_corners,
+                                  &giy_mult);
+    iz = ComputePositionsWithMask(iz, in_d, padding_mode, align_corners,
+                                  &giz_mult);
+
+    if (mode == Mode::bilinear) {
+      // get corner pixel values from (x, y, z)
+      // for 4d, we used north-east-south-west
+      // for 5d, we add top-bottom
+      index_t ix_tnw = static_cast<index_t>(std::floor(ix));
+      index_t iy_tnw = static_cast<index_t>(std::floor(iy));
+      index_t iz_tnw = static_cast<index_t>(std::floor(iz));
+
+      index_t ix_tne = ix_tnw + 1;
+      index_t iy_tne = iy_tnw;
+      index_t iz_tne = iz_tnw;
+
+      index_t ix_tsw = ix_tnw;
+      index_t iy_tsw = iy_tnw + 1;
+      index_t iz_tsw = iz_tnw;
+
+      index_t ix_tse = ix_tnw + 1;
+      index_t iy_tse = iy_tnw + 1;
+      index_t iz_tse = iz_tnw;
+
+      index_t ix_bnw = ix_tnw;
+      index_t iy_bnw = iy_tnw;
+      index_t iz_bnw = iz_tnw + 1;
+
+      index_t ix_bne = ix_tnw + 1;
+      index_t iy_bne = iy_tnw;
+      index_t iz_bne = iz_tnw + 1;
+
+      index_t ix_bsw = ix_tnw;
+      index_t iy_bsw = iy_tnw + 1;
+      index_t iz_bsw = iz_tnw + 1;
+
+      index_t ix_bse = ix_tnw + 1;
+      index_t iy_bse = iy_tnw + 1;
+      index_t iz_bse = iz_tnw + 1;
+
+      // get surfaces to each neighbor:
+      T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz);
+      T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz);
+      T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz);
+      T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz);
+      T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse);
+      T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw);
+      T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne);
+      T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw);
+
+      T gix = static_cast<T>(0), giy = static_cast<T>(0),
+        giz = static_cast<T>(0);
+      index_t gOut_offset =
+          n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW;
+      index_t inp_offset_NC = n * inp_sN;
+      T *gInp_ptr_NC = grad_input + n * inp_sN;
+      for (index_t c = 0; c < out_c; ++c, gOut_offset += gOut_sC,
+                   gInp_ptr_NC += inp_sC, inp_offset_NC += inp_sC) {
+        T gOut = grad_output[gOut_offset];
+
+        AtomicAdd3D(gInp_ptr_NC, iz_tnw, iy_tnw, ix_tnw, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, tnw * gOut);
+        AtomicAdd3D(gInp_ptr_NC, iz_tne, iy_tne, ix_tne, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, tne * gOut);
+        AtomicAdd3D(gInp_ptr_NC, iz_tsw, iy_tsw, ix_tsw, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, tsw * gOut);
+        AtomicAdd3D(gInp_ptr_NC, iz_tse, iy_tse, ix_tse, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, tse * gOut);
+        AtomicAdd3D(gInp_ptr_NC, iz_bnw, iy_bnw, ix_bnw, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, bnw * gOut);
+        AtomicAdd3D(gInp_ptr_NC, iz_bne, iy_bne, ix_bne, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, bne * gOut);
+        AtomicAdd3D(gInp_ptr_NC, iz_bsw, iy_bsw, ix_bsw, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, bsw * gOut);
+        AtomicAdd3D(gInp_ptr_NC, iz_bse, iy_bse, ix_bse, inp_sD, inp_sH, inp_sW,
+                    in_d, in_h, in_w, bse * gOut);
+
+        // calculate grad_grid
+        if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) {
+          T tnw_val = input[inp_offset_NC + iz_tnw * inp_sD + iy_tnw * inp_sH +
+                            ix_tnw * inp_sW];
+          gix -= tnw_val * (iy_bse - iy) * (iz_bse - iz) * gOut;
+          giy -= tnw_val * (ix_bse - ix) * (iz_bse - iz) * gOut;
+          giz -= tnw_val * (ix_bse - ix) * (iy_bse - iy) * gOut;
+        }
+        if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) {
+          T tne_val = input[inp_offset_NC + iz_tne * inp_sD + iy_tne * inp_sH +
+                            ix_tne * inp_sW];
+          gix += tne_val * (iy_bsw - iy) * (iz_bsw - iz) * gOut;
+          giy -= tne_val * (ix - ix_bsw) * (iz_bsw - iz) * gOut;
+          giz -= tne_val * (ix - ix_bsw) * (iy_bsw - iy) * gOut;
+        }
+        if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) {
+          T tsw_val = input[inp_offset_NC + iz_tsw * inp_sD + iy_tsw * inp_sH +
+                            ix_tsw * inp_sW];
+          gix -= tsw_val * (iy - iy_bne) * (iz_bne - iz) * gOut;
+          giy += tsw_val * (ix_bne - ix) * (iz_bne - iz) * gOut;
+          giz -= tsw_val * (ix_bne - ix) * (iy - iy_bne) * gOut;
+        }
+        if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) {
+          T tse_val = input[inp_offset_NC + iz_tse * inp_sD + iy_tse * inp_sH +
+                            ix_tse * inp_sW];
+          gix += tse_val * (iy - iy_bnw) * (iz_bnw - iz) * gOut;
+          giy += tse_val * (ix - ix_bnw) * (iz_bnw - iz) * gOut;
+          giz -= tse_val * (ix - ix_bnw) * (iy - iy_bnw) * gOut;
+        }
+        if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) {
+          T bnw_val = input[inp_offset_NC + iz_bnw * inp_sD + iy_bnw * inp_sH +
+                            ix_bnw * inp_sW];
+          gix -= bnw_val * (iy_tse - iy) * (iz - iz_tse) * gOut;
+          giy -= bnw_val * (ix_tse - ix) * (iz - iz_tse) * gOut;
+          giz += bnw_val * (ix_tse - ix) * (iy_tse - iy) * gOut;
+        }
+        if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) {
+          T bne_val = input[inp_offset_NC + iz_bne * inp_sD + iy_bne * inp_sH +
+                            ix_bne * inp_sW];
+          gix += bne_val * (iy_tsw - iy) * (iz - iz_tsw) * gOut;
+          giy -= bne_val * (ix - ix_tsw) * (iz - iz_tsw) * gOut;
+          giz += bne_val * (ix - ix_tsw) * (iy_tsw - iy) * gOut;
+        }
+        if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) {
+          T bsw_val = input[inp_offset_NC + iz_bsw * inp_sD + iy_bsw * inp_sH +
+                            ix_bsw * inp_sW];
+          gix -= bsw_val * (iy - iy_tne) * (iz - iz_tne) * gOut;
+          giy += bsw_val * (ix_tne - ix) * (iz - iz_tne) * gOut;
+          giz += bsw_val * (ix_tne - ix) * (iy - iy_tne) * gOut;
+        }
+        if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) {
+          T bse_val = input[inp_offset_NC + iz_bse * inp_sD + iy_bse * inp_sH +
+                            ix_bse * inp_sW];
+          gix += bse_val * (iy - iy_tnw) * (iz - iz_tnw) * gOut;
+          giy += bse_val * (ix - ix_tnw) * (iz - iz_tnw) * gOut;
+          giz += bse_val * (ix - ix_tnw) * (iy - iy_tnw) * gOut;
+        }
+      }
+      if (grad_grid != nullptr) {
+        T *gGrid_ptr_NDHW = grad_grid + index * grid_sW;
+        gGrid_ptr_NDHW[0] = gix_mult * gix;
+        gGrid_ptr_NDHW[1] = giy_mult * giy;
+        gGrid_ptr_NDHW[2] = giz_mult * giz;
+      }
+    } else if (mode == Mode::nearest) {
+      auto ix_nearest = static_cast<index_t>(std::round(ix));
+      auto iy_nearest = static_cast<index_t>(std::round(iy));
+      auto iz_nearest = static_cast<index_t>(std::round(iz));
+
+      // assign nearest neighor pixel value to output pixel
+      index_t gOut_offset =
+          n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW;
+      T *gInp_ptr_NC = grad_input + n * inp_sN;
+      for (index_t c = 0; c < out_c;
+           ++c, gOut_offset += gOut_sC, gInp_ptr_NC += inp_sC) {
+        AtomicAdd3D(gInp_ptr_NC, iz_nearest, iy_nearest, ix_nearest, inp_sD,
+                    inp_sH, inp_sW, in_d, in_h, in_w, grad_output[gOut_offset]);
+      }
+      if (grad_grid != nullptr) {
+        T *gGrid_ptr_NDHW = grad_grid + index * grid_sW;
+        gGrid_ptr_NDHW[0] = static_cast<T>(0);
+        gGrid_ptr_NDHW[1] = static_cast<T>(0);
+        gGrid_ptr_NDHW[2] = static_cast<T>(0);
+      }
+    }
+  }
+}
+
+std::vector<paddle::Tensor>
+GridSample3DCUDABackward(const paddle::Tensor &x, const paddle::Tensor &grid,
+                         const paddle::Tensor &grad_out,
+                         const std::string &mode,
+                         const std::string &padding_mode, bool align_corners) {
+  PaddingMode enum_padding_mode;
+  Mode enum_mode;
+  if (padding_mode == "border") {
+    enum_padding_mode = PaddingMode::border;
+  } else if (padding_mode == "reflection") {
+    enum_padding_mode = PaddingMode::reflect;
+  } else {
+    enum_padding_mode = PaddingMode::zeros;
+  }
+
+  if (mode == "nearest") {
+    enum_mode = Mode::nearest;
+  } else {
+    enum_mode = Mode::bilinear;
+  }
+
+  const int out_d = grid.shape()[1];
+  const int out_h = grid.shape()[2];
+  const int out_w = grid.shape()[3];
+  const int n = x.shape()[0];
+  const int c = x.shape()[1];
+  const int in_d = x.shape()[2];
+  const int in_h = x.shape()[3];
+  const int in_w = x.shape()[4];
+
+  auto grid_grad_output =
+      paddle::empty({n, out_d, out_h, out_w, 3}, paddle::DataType::FLOAT32,
+                    paddle::GPUPlace());
+  auto x_grad_output =
+      paddle::full({n, c, in_d, in_h, in_w}, 0, paddle::DataType::FLOAT32,
+                   paddle::GPUPlace());
+
+  const int count = static_cast<int>(n * out_d * out_h * out_w);
+
+  int max_threads_per_block = 512;
+  int block_num = (count - 1) / max_threads_per_block + 1;
+
+  GridSample3DCudaBackwardKernel<float, int>
+      <<<block_num, max_threads_per_block, 0, x.stream()>>>(
+          count, grad_out.data<float>(), x.data<float>(), grid.data<float>(), c,
+          out_d, out_h, out_w, in_d, in_h, in_w, x_grad_output.data<float>(),
+          grid_grad_output.data<float>(), enum_mode, enum_padding_mode,
+          align_corners);
+
+  return {x_grad_output};
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h
new file mode 100755
index 0000000000..9374cb75e8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/grid_sample_3d.h
@@ -0,0 +1,33 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#pragma once
+
+#include <cassert>
+#include <cmath>
+#include <vector>
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+#define HOST_DEVICE __host__ __device__
+#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
+
+enum class Mode { bilinear, nearest };
+
+enum class PaddingMode { zeros, border, reflect };
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc
new file mode 100755
index 0000000000..4404e5f345
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.cc
@@ -0,0 +1,272 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+3D Rotated IoU Calculation (CPU)
+Written by Shaoshuai Shi
+All Rights Reserved 2020.
+*/
+
+#include "iou3d_cpu.h"
+#include <math.h>
+#include <stdio.h>
+#include <vector>
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+static inline float min(float a, float b) { return a > b ? b : a; }
+
+static inline float max(float a, float b) { return a > b ? a : b; }
+
+#if defined(_WIN32)
+#if defined(EPS)
+#undef EPS
+#endif
+#define EPS 1e-8
+#else
+static const float EPS = 1e-8;
+#endif
+
+struct Point {
+  float x, y;
+  Point() {}
+  Point(double _x, double _y) { x = _x, y = _y; }
+
+  void set(float _x, float _y) {
+    x = _x;
+    y = _y;
+  }
+
+  Point operator+(const Point &b) const { return Point(x + b.x, y + b.y); }
+
+  Point operator-(const Point &b) const { return Point(x - b.x, y - b.y); }
+};
+
+static inline float cross(const Point &a, const Point &b) {
+  return a.x * b.y - a.y * b.x;
+}
+
+static inline float cross(const Point &p1, const Point &p2, const Point &p0) {
+  return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
+}
+
+static inline int check_rect_cross(const Point &p1, const Point &p2,
+                                   const Point &q1, const Point &q2) {
+  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&
+            min(q1.x, q2.x) <= max(p1.x, p2.x) &&
+            min(p1.y, p2.y) <= max(q1.y, q2.y) &&
+            min(q1.y, q2.y) <= max(p1.y, p2.y);
+  return ret;
+}
+
+static inline int check_in_box2d(const float *box, const Point &p) {
+  // params: (7) [x, y, z, dx, dy, dz, heading]
+  const float MARGIN = 1e-2;
+
+  float center_x = box[0], center_y = box[1];
+  float angle_cos = cos(-box[6]),
+        angle_sin =
+            sin(-box[6]); // rotate the point in the opposite direction of box
+  float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
+  float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
+
+  return (fabs(rot_x) < box[3] / 2 + MARGIN &&
+          fabs(rot_y) < box[4] / 2 + MARGIN);
+}
+
+static inline int intersection(const Point &p1, const Point &p0,
+                               const Point &q1, const Point &q0, Point &ans) {
+  // fast exclusion
+  if (check_rect_cross(p0, p1, q0, q1) == 0)
+    return 0;
+
+  // check cross standing
+  float s1 = cross(q0, p1, p0);
+  float s2 = cross(p1, q1, p0);
+  float s3 = cross(p0, q1, q0);
+  float s4 = cross(q1, p1, q0);
+
+  if (!(s1 * s2 > 0 && s3 * s4 > 0))
+    return 0;
+
+  // calculate intersection of two lines
+  float s5 = cross(q1, p1, p0);
+  if (fabs(s5 - s1) > EPS) {
+    ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
+    ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
+
+  } else {
+    float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
+    float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
+    float D = a0 * b1 - a1 * b0;
+
+    ans.x = (b0 * c1 - b1 * c0) / D;
+    ans.y = (a1 * c0 - a0 * c1) / D;
+  }
+
+  return 1;
+}
+
+static inline void rotate_around_center(const Point &center,
+                                        const float angle_cos,
+                                        const float angle_sin, Point &p) {
+  float new_x =
+      (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x;
+  float new_y =
+      (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
+  p.set(new_x, new_y);
+}
+
+static inline int point_cmp(const Point &a, const Point &b,
+                            const Point &center) {
+  return atan2(a.y - center.y, a.x - center.x) >
+         atan2(b.y - center.y, b.x - center.x);
+}
+
+static inline float box_overlap(const float *box_a, const float *box_b) {
+  // params: box_a (7) [x, y, z, dx, dy, dz, heading]
+  // params: box_b (7) [x, y, z, dx, dy, dz, heading]
+
+  //    float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 =
+  //    box_a[3], a_angle = box_a[4];
+  //    float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 =
+  //    box_b[3], b_angle = box_b[4];
+  float a_angle = box_a[6], b_angle = box_b[6];
+  float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2,
+        a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2;
+  float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half;
+  float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half;
+  float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half;
+  float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half;
+
+  Point center_a(box_a[0], box_a[1]);
+  Point center_b(box_b[0], box_b[1]);
+
+  Point box_a_corners[5];
+  box_a_corners[0].set(a_x1, a_y1);
+  box_a_corners[1].set(a_x2, a_y1);
+  box_a_corners[2].set(a_x2, a_y2);
+  box_a_corners[3].set(a_x1, a_y2);
+
+  Point box_b_corners[5];
+  box_b_corners[0].set(b_x1, b_y1);
+  box_b_corners[1].set(b_x2, b_y1);
+  box_b_corners[2].set(b_x2, b_y2);
+  box_b_corners[3].set(b_x1, b_y2);
+
+  // get oriented corners
+  float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
+  float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
+
+  for (int k = 0; k < 4; k++) {
+    rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
+    rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
+  }
+
+  box_a_corners[4] = box_a_corners[0];
+  box_b_corners[4] = box_b_corners[0];
+
+  // get intersection of lines
+  Point cross_points[16];
+  Point poly_center;
+  int cnt = 0, flag = 0;
+
+  poly_center.set(0, 0);
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      flag = intersection(box_a_corners[i + 1], box_a_corners[i],
+                          box_b_corners[j + 1], box_b_corners[j],
+                          cross_points[cnt]);
+      if (flag) {
+        poly_center = poly_center + cross_points[cnt];
+        cnt++;
+      }
+    }
+  }
+
+  // check corners
+  for (int k = 0; k < 4; k++) {
+    if (check_in_box2d(box_a, box_b_corners[k])) {
+      poly_center = poly_center + box_b_corners[k];
+      cross_points[cnt] = box_b_corners[k];
+      cnt++;
+    }
+    if (check_in_box2d(box_b, box_a_corners[k])) {
+      poly_center = poly_center + box_a_corners[k];
+      cross_points[cnt] = box_a_corners[k];
+      cnt++;
+    }
+  }
+
+  poly_center.x /= cnt;
+  poly_center.y /= cnt;
+
+  // sort the points of polygon
+  Point temp;
+  for (int j = 0; j < cnt - 1; j++) {
+    for (int i = 0; i < cnt - j - 1; i++) {
+      if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {
+        temp = cross_points[i];
+        cross_points[i] = cross_points[i + 1];
+        cross_points[i + 1] = temp;
+      }
+    }
+  }
+
+  // get the overlap areas
+  float area = 0;
+  for (int k = 0; k < cnt - 1; k++) {
+    area += cross(cross_points[k] - cross_points[0],
+                  cross_points[k + 1] - cross_points[0]);
+  }
+
+  return fabs(area) / 2.0;
+}
+
+static inline float iou_bev(const float *box_a, const float *box_b) {
+  // params: box_a (7) [x, y, z, dx, dy, dz, heading]
+  // params: box_b (7) [x, y, z, dx, dy, dz, heading]
+  float sa = box_a[3] * box_a[4];
+  float sb = box_b[3] * box_b[4];
+  float s_overlap = box_overlap(box_a, box_b);
+  return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
+}
+
+int boxes_iou_bev_cpu(paddle::Tensor boxes_a_tensor,
+                      paddle::Tensor boxes_b_tensor,
+                      paddle::Tensor ans_iou_tensor) {
+  // params boxes_a_tensor: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params boxes_b_tensor: (M, 7) [x, y, z, dx, dy, dz, heading]
+  // params ans_iou_tensor: (N, M)
+
+  // CHECK_CONTIGUOUS(boxes_a_tensor);
+  // CHECK_CONTIGUOUS(boxes_b_tensor);
+
+  int num_boxes_a = boxes_a_tensor.shape()[0];
+  int num_boxes_b = boxes_b_tensor.shape()[0];
+  const float *boxes_a = boxes_a_tensor.data<float>();
+  const float *boxes_b = boxes_b_tensor.data<float>();
+  float *ans_iou = ans_iou_tensor.data<float>();
+
+  for (int i = 0; i < num_boxes_a; i++) {
+    for (int j = 0; j < num_boxes_b; j++) {
+      ans_iou[i * num_boxes_b + j] = iou_bev(boxes_a + i * 7, boxes_b + j * 7);
+    }
+  }
+  return 1;
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h
new file mode 100755
index 0000000000..09fe5b9ccc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_cpu.h
@@ -0,0 +1,35 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+ULTRAINFER_DECL int boxes_iou_bev_cpu(paddle::Tensor boxes_a_tensor,
+                                      paddle::Tensor boxes_b_tensor,
+                                      paddle::Tensor ans_iou_tensor);
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc
new file mode 100755
index 0000000000..627e603cff
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.cc
@@ -0,0 +1,241 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+
+#if defined(WITH_GPU)
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+#include "iou3d_nms.h"
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
+// #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+static inline int DIVUP(const int m, const int n) {
+  return ((m) / (n) + ((m) % (n) > 0));
+}
+
+#define CHECK_ERROR(ans)                                                       \
+  { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line,
+                      bool abort = true) {
+  if (code != cudaSuccess) {
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
+    if (abort)
+      exit(code);
+  }
+}
+
+#define D(x)                                                                   \
+  PD_THROW('\n', x,                                                            \
+           "\n--------------------------------- where is the error ? "         \
+           "---------------------------------------\n");
+
+static const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
+
+void boxesoverlapLauncher(const int num_a, const float *boxes_a,
+                          const int num_b, const float *boxes_b,
+                          float *ans_overlap);
+void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,
+                         const float *boxes_b, float *ans_iou);
+void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,
+                 float nms_overlap_thresh);
+void nmsNormalLauncher(const float *boxes, unsigned long long *mask,
+                       int boxes_num, float nms_overlap_thresh);
+
+int boxes_overlap_bev_gpu(paddle::Tensor boxes_a, paddle::Tensor boxes_b,
+                          paddle::Tensor ans_overlap) {
+  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
+  // params ans_overlap: (N, M)
+
+  CHECK_INPUT(boxes_a);
+  CHECK_INPUT(boxes_b);
+  CHECK_INPUT(ans_overlap);
+
+  int num_a = boxes_a.shape()[0];
+  int num_b = boxes_b.shape()[0];
+
+  const float *boxes_a_data = boxes_a.data<float>();
+  const float *boxes_b_data = boxes_b.data<float>();
+  float *ans_overlap_data = ans_overlap.data<float>();
+
+  boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data,
+                       ans_overlap_data);
+
+  return 1;
+}
+
+int boxes_iou_bev_gpu(paddle::Tensor boxes_a, paddle::Tensor boxes_b,
+                      paddle::Tensor ans_iou) {
+  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
+  // params ans_overlap: (N, M)
+  CHECK_INPUT(boxes_a);
+  CHECK_INPUT(boxes_b);
+  CHECK_INPUT(ans_iou);
+
+  int num_a = boxes_a.shape()[0];
+  int num_b = boxes_b.shape()[0];
+
+  const float *boxes_a_data = boxes_a.data<float>();
+  const float *boxes_b_data = boxes_b.data<float>();
+  float *ans_iou_data = ans_iou.data<float>();
+
+  boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data);
+
+  return 1;
+}
+
+std::vector<paddle::Tensor> nms_gpu(const paddle::Tensor &boxes,
+                                    float nms_overlap_thresh) {
+  // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params keep: (N)
+  CHECK_INPUT(boxes);
+  // CHECK_CONTIGUOUS(keep);
+  auto keep = paddle::empty({boxes.shape()[0]}, paddle::DataType::INT32,
+                            paddle::CPUPlace());
+  auto num_to_keep_tensor =
+      paddle::empty({1}, paddle::DataType::INT32, paddle::CPUPlace());
+  int *num_to_keep_data = num_to_keep_tensor.data<int>();
+
+  int boxes_num = boxes.shape()[0];
+  const float *boxes_data = boxes.data<float>();
+  int *keep_data = keep.data<int>();
+
+  int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+
+  unsigned long long *mask_data = NULL;
+  CHECK_ERROR(cudaMalloc((void **)&mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long)));
+  nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);
+
+  // unsigned long long mask_cpu[boxes_num * col_blocks];
+  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *
+  // col_blocks];
+  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);
+
+  //    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
+  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long),
+                         cudaMemcpyDeviceToHost));
+
+  cudaFree(mask_data);
+
+  // WARN(qiuyanjun): codes below will throw a compile error on windows with
+  // msvc. Thus, we choosed to use std::vectored to store the result instead.
+  // unsigned long long remv_cpu[col_blocks];
+  // memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long));
+  std::vector<unsigned long long> remv_cpu(col_blocks, 0);
+
+  int num_to_keep = 0;
+
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / THREADS_PER_BLOCK_NMS;
+    int inblock = i % THREADS_PER_BLOCK_NMS;
+
+    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
+      keep_data[num_to_keep++] = i;
+      unsigned long long *p = &mask_cpu[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv_cpu[j] |= p[j];
+      }
+    }
+  }
+
+  num_to_keep_data[0] = num_to_keep;
+
+  if (cudaSuccess != cudaGetLastError())
+    printf("Error!\n");
+
+  return {keep, num_to_keep_tensor};
+}
+
+int nms_normal_gpu(paddle::Tensor boxes, paddle::Tensor keep,
+                   float nms_overlap_thresh) {
+  // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params keep: (N)
+
+  CHECK_INPUT(boxes);
+  // CHECK_CONTIGUOUS(keep);
+
+  int boxes_num = boxes.shape()[0];
+  const float *boxes_data = boxes.data<float>();
+  // WARN(qiuyanjun): long type for Tensor::data() API is not exported by
+  // paddle, it will raise some link error on windows with msvc. Please check:
+  // https://github.com/PaddlePaddle/Paddle/blob/release/2.5/paddle/phi/api/lib/tensor.cc
+#if defined(_WIN32)
+  int *keep_data = keep.data<int>();
+#else
+  long *keep_data = keep.data<long>();
+#endif
+
+  int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+
+  unsigned long long *mask_data = NULL;
+  CHECK_ERROR(cudaMalloc((void **)&mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long)));
+  nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);
+
+  // unsigned long long mask_cpu[boxes_num * col_blocks];
+  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *
+  // col_blocks];
+  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);
+
+  //    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
+  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long),
+                         cudaMemcpyDeviceToHost));
+
+  cudaFree(mask_data);
+
+  // WARN(qiuyanjun): codes below will throw a compile error on windows with
+  // msvc. Thus, we choosed to use std::vectored to store the result instead.
+  // unsigned long long remv_cpu[col_blocks];
+  // memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long));
+  std::vector<unsigned long long> remv_cpu(col_blocks, 0);
+
+  int num_to_keep = 0;
+
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / THREADS_PER_BLOCK_NMS;
+    int inblock = i % THREADS_PER_BLOCK_NMS;
+
+    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
+      keep_data[num_to_keep++] = i;
+      unsigned long long *p = &mask_cpu[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv_cpu[j] |= p[j];
+      }
+    }
+  }
+  if (cudaSuccess != cudaGetLastError())
+    printf("Error!\n");
+
+  return num_to_keep;
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h
new file mode 100755
index 0000000000..d9c9485366
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms.h
@@ -0,0 +1,45 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+#include "ultrainfer/utils/utils.h"
+
+#if defined(WITH_GPU)
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+ULTRAINFER_DECL int boxes_overlap_bev_gpu(paddle::Tensor boxes_a,
+                                          paddle::Tensor boxes_b,
+                                          paddle::Tensor ans_overlap);
+ULTRAINFER_DECL int boxes_iou_bev_gpu(paddle::Tensor boxes_a,
+                                      paddle::Tensor boxes_b,
+                                      paddle::Tensor ans_iou);
+ULTRAINFER_DECL std::vector<paddle::Tensor> nms_gpu(const paddle::Tensor &boxes,
+                                                    float nms_overlap_thresh);
+ULTRAINFER_DECL int nms_normal_gpu(paddle::Tensor boxes, paddle::Tensor keep,
+                                   float nms_overlap_thresh);
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc
new file mode 100755
index 0000000000..ac0df58a00
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_api.cc
@@ -0,0 +1,56 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+#include <vector>
+
+#include "iou3d_cpu.h"
+#include "iou3d_nms.h"
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+std::vector<std::vector<int64_t>>
+NMSInferShape(std::vector<int64_t> boxes_shape) {
+  int64_t keep_num = 1;
+  return {{boxes_shape[0]}, {keep_num}};
+}
+
+std::vector<paddle::DataType> NMSInferDtype(paddle::DataType boxes_dtype) {
+  return {paddle::DataType::INT64, paddle::DataType::INT64};
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
+
+#if defined(WITH_GPU)
+
+PD_BUILD_OP(nms_gpu)
+    .Inputs({"boxes"})
+    .Outputs({"keep", "num_to_keep"})
+    .Attrs({"nms_overlap_thresh: float"})
+    .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::nms_gpu))
+    .SetInferDtypeFn(
+        PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::NMSInferDtype))
+    .SetInferShapeFn(
+        PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::NMSInferShape));
+
+#endif
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu
new file mode 100755
index 0000000000..fee08b8dfb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/iou3d_nms_kernel.cu
@@ -0,0 +1,588 @@
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+#include <stdio.h>
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+// #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+__host__ __device__ static inline int DIVUP(const int m, const int n) {
+  return ((m) / (n) + ((m) % (n) > 0));
+}
+
+static const int THREADS_PER_BLOCK = 16;
+static const int THREADS_PER_BLOCK_NMS = sizeof(int64_t) * 8;
+#if defined(_WIN32)
+#if defined(EPS)
+#undef EPS
+#endif
+#define EPS 1e-8
+#else
+static const float EPS = 1e-8;
+#endif
+
+struct Point {
+  float x, y;
+  __device__ Point() {}
+  __device__ Point(double _x, double _y) { x = _x, y = _y; }
+
+  __device__ void set(float _x, float _y) {
+    x = _x;
+    y = _y;
+  }
+
+  __device__ Point operator+(const Point &b) const {
+    return Point(x + b.x, y + b.y);
+  }
+
+  __device__ Point operator-(const Point &b) const {
+    return Point(x - b.x, y - b.y);
+  }
+};
+
+__device__ inline float cross(const Point &a, const Point &b) {
+  return a.x * b.y - a.y * b.x;
+}
+
+__device__ inline float cross(const Point &p1, const Point &p2,
+                              const Point &p0) {
+  return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
+}
+
+__device__ int check_rect_cross(const Point &p1, const Point &p2,
+                                const Point &q1, const Point &q2) {
+  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&
+            min(q1.x, q2.x) <= max(p1.x, p2.x) &&
+            min(p1.y, p2.y) <= max(q1.y, q2.y) &&
+            min(q1.y, q2.y) <= max(p1.y, p2.y);
+  return ret;
+}
+
+__device__ inline int check_in_box2d(const float *box, const Point &p) {
+  // params: (7) [x, y, z, dx, dy, dz, heading]
+  const float MARGIN = 1e-2;
+  // Align with the setting of mmdet3d
+  // const float MARGIN = 1e-5;
+
+  float center_x = box[0], center_y = box[1];
+  float angle_cos = cos(-box[6]),
+        angle_sin =
+            sin(-box[6]); // rotate the point in the opposite direction of box
+  float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
+  float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
+
+  return (fabs(rot_x) < box[3] / 2 + MARGIN &&
+          fabs(rot_y) < box[4] / 2 + MARGIN);
+  // Align with the implement of mmdet3d
+  // float rot_x =
+  //     (p.x - center_x) * angle_cos + (p.y - center_y) * angle_sin + center_x;
+  // float rot_y =
+  //     -(p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos +
+  //     center_y;
+  // float x1 = center_x - box[3] / 2;
+  // float x2 = center_x + box[3] / 2;
+  // float y1 = center_y - box[4] / 2;
+  // float y2 = center_y + box[4] / 2;
+  // return (rot_x > x1 - MARGIN && rot_x < x2 + MARGIN && rot_y > y1 - MARGIN
+  // &&
+  //         rot_y < y2 + MARGIN);
+}
+
+__device__ inline int intersection(const Point &p1, const Point &p0,
+                                   const Point &q1, const Point &q0,
+                                   Point &ans) {
+  // fast exclusion
+  if (check_rect_cross(p0, p1, q0, q1) == 0)
+    return 0;
+
+  // check cross standing
+  float s1 = cross(q0, p1, p0);
+  float s2 = cross(p1, q1, p0);
+  float s3 = cross(p0, q1, q0);
+  float s4 = cross(q1, p1, q0);
+
+  if (!(s1 * s2 > 0 && s3 * s4 > 0))
+    return 0;
+
+  // calculate intersection of two lines
+  float s5 = cross(q1, p1, p0);
+  if (fabs(s5 - s1) > EPS) {
+    ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
+    ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
+
+  } else {
+    float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
+    float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
+    float D = a0 * b1 - a1 * b0;
+
+    ans.x = (b0 * c1 - b1 * c0) / D;
+    ans.y = (a1 * c0 - a0 * c1) / D;
+  }
+
+  return 1;
+}
+
+__device__ inline void rotate_around_center(const Point &center,
+                                            const float angle_cos,
+                                            const float angle_sin, Point &p) {
+  // float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) *
+  // (-angle_sin) + center.x;
+  // float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos +
+  // center.y;
+  // p.set(new_x, new_y);
+  // Aligh with the implement of mmdet3d
+  float new_x =
+      (p.x - center.x) * angle_cos + (p.y - center.y) * angle_sin + center.x;
+  float new_y =
+      -(p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
+  p.set(new_x, new_y);
+}
+
+__device__ inline int point_cmp(const Point &a, const Point &b,
+                                const Point &center) {
+  return atan2(a.y - center.y, a.x - center.x) >
+         atan2(b.y - center.y, b.x - center.x);
+}
+
+__device__ inline float box_overlap(const float *box_a, const float *box_b) {
+  // params box_a: [x, y, z, dx, dy, dz, heading]
+  // params box_b: [x, y, z, dx, dy, dz, heading]
+
+  float a_angle = box_a[6], b_angle = box_b[6];
+  float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2,
+        a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2;
+  float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half;
+  float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half;
+  float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half;
+  float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half;
+
+  Point center_a(box_a[0], box_a[1]);
+  Point center_b(box_b[0], box_b[1]);
+
+#ifdef DEBUG
+  printf(
+      "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n",
+      a_x1, a_y1, a_x2, a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
+  printf("center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y,
+         center_b.x, center_b.y);
+#endif
+
+  Point box_a_corners[5];
+  box_a_corners[0].set(a_x1, a_y1);
+  box_a_corners[1].set(a_x2, a_y1);
+  box_a_corners[2].set(a_x2, a_y2);
+  box_a_corners[3].set(a_x1, a_y2);
+
+  Point box_b_corners[5];
+  box_b_corners[0].set(b_x1, b_y1);
+  box_b_corners[1].set(b_x2, b_y1);
+  box_b_corners[2].set(b_x2, b_y2);
+  box_b_corners[3].set(b_x1, b_y2);
+
+  // get oriented corners
+  float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
+  float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
+
+  for (int k = 0; k < 4; k++) {
+#ifdef DEBUG
+    printf("before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k,
+           box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x,
+           box_b_corners[k].y);
+#endif
+    rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
+    rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
+#ifdef DEBUG
+    printf("corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x,
+           box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
+#endif
+  }
+
+  box_a_corners[4] = box_a_corners[0];
+  box_b_corners[4] = box_b_corners[0];
+
+  // get intersection of lines
+  Point cross_points[16];
+  Point poly_center;
+  int cnt = 0, flag = 0;
+
+  poly_center.set(0, 0);
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      flag = intersection(box_a_corners[i + 1], box_a_corners[i],
+                          box_b_corners[j + 1], box_b_corners[j],
+                          cross_points[cnt]);
+      if (flag) {
+        poly_center = poly_center + cross_points[cnt];
+        cnt++;
+#ifdef DEBUG
+        printf(
+            "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, "
+            "%.3f)->(%.3f, %.3f) \n",
+            cross_points[cnt - 1].x, cross_points[cnt - 1].y,
+            box_a_corners[i].x, box_a_corners[i].y, box_a_corners[i + 1].x,
+            box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
+            box_b_corners[i + 1].x, box_b_corners[i + 1].y);
+#endif
+      }
+    }
+  }
+
+  // check corners
+  for (int k = 0; k < 4; k++) {
+    if (check_in_box2d(box_a, box_b_corners[k])) {
+      poly_center = poly_center + box_b_corners[k];
+      cross_points[cnt] = box_b_corners[k];
+      cnt++;
+#ifdef DEBUG
+      printf("b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x,
+             cross_points[cnt - 1].y);
+#endif
+    }
+    if (check_in_box2d(box_b, box_a_corners[k])) {
+      poly_center = poly_center + box_a_corners[k];
+      cross_points[cnt] = box_a_corners[k];
+      cnt++;
+#ifdef DEBUG
+      printf("a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x,
+             cross_points[cnt - 1].y);
+#endif
+    }
+  }
+
+  poly_center.x /= cnt;
+  poly_center.y /= cnt;
+
+  // sort the points of polygon
+  Point temp;
+  for (int j = 0; j < cnt - 1; j++) {
+    for (int i = 0; i < cnt - j - 1; i++) {
+      if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {
+        temp = cross_points[i];
+        cross_points[i] = cross_points[i + 1];
+        cross_points[i + 1] = temp;
+      }
+    }
+  }
+
+#ifdef DEBUG
+  printf("cnt=%d\n", cnt);
+  for (int i = 0; i < cnt; i++) {
+    printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x,
+           cross_points[i].y);
+  }
+#endif
+
+  // get the overlap areas
+  float area = 0;
+  for (int k = 0; k < cnt - 1; k++) {
+    area += cross(cross_points[k] - cross_points[0],
+                  cross_points[k + 1] - cross_points[0]);
+  }
+
+  return fabs(area) / 2.0;
+}
+
+__device__ inline float iou_bev(const float *box_a, const float *box_b) {
+  // params box_a: [x, y, z, dx, dy, dz, heading]
+  // params box_b: [x, y, z, dx, dy, dz, heading]
+  float sa = box_a[3] * box_a[4];
+  float sb = box_b[3] * box_b[4];
+  float s_overlap = box_overlap(box_a, box_b);
+  return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
+}
+
+__global__ void boxes_overlap_kernel(const int num_a, const float *boxes_a,
+                                     const int num_b, const float *boxes_b,
+                                     float *ans_overlap) {
+  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
+  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+
+  if (a_idx >= num_a || b_idx >= num_b) {
+    return;
+  }
+  const float *cur_box_a = boxes_a + a_idx * 7;
+  const float *cur_box_b = boxes_b + b_idx * 7;
+  float s_overlap = box_overlap(cur_box_a, cur_box_b);
+  ans_overlap[a_idx * num_b + b_idx] = s_overlap;
+}
+
+__global__ void boxes_iou_bev_kernel(const int num_a, const float *boxes_a,
+                                     const int num_b, const float *boxes_b,
+                                     float *ans_iou) {
+  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
+  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+
+  if (a_idx >= num_a || b_idx >= num_b) {
+    return;
+  }
+
+  const float *cur_box_a = boxes_a + a_idx * 7;
+  const float *cur_box_b = boxes_b + b_idx * 7;
+  float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
+  ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
+}
+
+__global__ void nms_kernel(const int boxes_num, const float nms_overlap_thresh,
+                           const float *boxes, unsigned long long *mask) {
+  // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
+
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
+                             THREADS_PER_BLOCK_NMS);
+  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
+                             THREADS_PER_BLOCK_NMS);
+
+  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7];
+
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 7 + 0] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0];
+    block_boxes[threadIdx.x * 7 + 1] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1];
+    block_boxes[threadIdx.x * 7 + 2] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2];
+    block_boxes[threadIdx.x * 7 + 3] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3];
+    block_boxes[threadIdx.x * 7 + 4] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4];
+    block_boxes[threadIdx.x * 7 + 5] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5];
+    block_boxes[threadIdx.x * 7 + 6] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const float *cur_box = boxes + cur_box_idx * 7;
+
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+__device__ inline float iou_normal(float const *const a, float const *const b) {
+  // params: a: [x, y, z, dx, dy, dz, heading]
+  // params: b: [x, y, z, dx, dy, dz, heading]
+
+  float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2),
+        right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2);
+  float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2),
+        bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2);
+  float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
+  float interS = width * height;
+  float Sa = a[3] * a[4];
+  float Sb = b[3] * b[4];
+  return interS / fmaxf(Sa + Sb - interS, EPS);
+}
+
+__global__ void nms_normal_kernel(const int boxes_num,
+                                  const float nms_overlap_thresh,
+                                  const float *boxes,
+                                  unsigned long long *mask) {
+  // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
+
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
+                             THREADS_PER_BLOCK_NMS);
+  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
+                             THREADS_PER_BLOCK_NMS);
+
+  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7];
+
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 7 + 0] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0];
+    block_boxes[threadIdx.x * 7 + 1] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1];
+    block_boxes[threadIdx.x * 7 + 2] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2];
+    block_boxes[threadIdx.x * 7 + 3] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3];
+    block_boxes[threadIdx.x * 7 + 4] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4];
+    block_boxes[threadIdx.x * 7 + 5] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5];
+    block_boxes[threadIdx.x * 7 + 6] =
+        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const float *cur_box = boxes + cur_box_idx * 7;
+
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (iou_normal(cur_box, block_boxes + i * 7) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+void boxesoverlapLauncher(const int num_a, const float *boxes_a,
+                          const int num_b, const float *boxes_b,
+                          float *ans_overlap) {
+  dim3 blocks(
+      DIVUP(num_b, THREADS_PER_BLOCK),
+      DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row)
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+
+  boxes_overlap_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
+                                            ans_overlap);
+#ifdef DEBUG
+  cudaDeviceSynchronize(); // for using printf in kernel function
+#endif
+}
+
+void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,
+                         const float *boxes_b, float *ans_iou) {
+  dim3 blocks(
+      DIVUP(num_b, THREADS_PER_BLOCK),
+      DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row)
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+
+  boxes_iou_bev_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
+                                            ans_iou);
+#ifdef DEBUG
+  cudaDeviceSynchronize(); // for using printf in kernel function
+#endif
+}
+
+void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,
+                 float nms_overlap_thresh) {
+  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
+              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  nms_kernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);
+}
+
+void nmsNormalLauncher(const float *boxes, unsigned long long *mask,
+                       int boxes_num, float nms_overlap_thresh) {
+  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
+              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  nms_normal_kernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes,
+                                         mask);
+}
+
+__global__ void nms_kernel_centerpoint(const int num_bboxes,
+                                       const int num_bboxes_for_nms,
+                                       const float nms_overlap_thresh,
+                                       const int decode_bboxes_dims,
+                                       const float *bboxes, const int *index,
+                                       const int64_t *sorted_index,
+                                       int64_t *mask) {
+  // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
+
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size =
+      fminf(num_bboxes_for_nms - row_start * THREADS_PER_BLOCK_NMS,
+            THREADS_PER_BLOCK_NMS);
+  const int col_size =
+      fminf(num_bboxes_for_nms - col_start * THREADS_PER_BLOCK_NMS,
+            THREADS_PER_BLOCK_NMS);
+
+  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7];
+
+  if (threadIdx.x < col_size) {
+    int box_idx =
+        index[sorted_index[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x]];
+    block_boxes[threadIdx.x * 7 + 0] = bboxes[box_idx * decode_bboxes_dims];
+    block_boxes[threadIdx.x * 7 + 1] = bboxes[box_idx * decode_bboxes_dims + 1];
+    block_boxes[threadIdx.x * 7 + 2] = bboxes[box_idx * decode_bboxes_dims + 2];
+    block_boxes[threadIdx.x * 7 + 3] = bboxes[box_idx * decode_bboxes_dims + 4];
+    block_boxes[threadIdx.x * 7 + 4] = bboxes[box_idx * decode_bboxes_dims + 3];
+    block_boxes[threadIdx.x * 7 + 5] = bboxes[box_idx * decode_bboxes_dims + 5];
+    block_boxes[threadIdx.x * 7 + 6] =
+        -bboxes[box_idx * decode_bboxes_dims + decode_bboxes_dims - 1] -
+        3.141592653589793 / 2;
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const int act_box_idx = index[sorted_index[cur_box_idx]];
+    float cur_box[7];
+    cur_box[0] = bboxes[act_box_idx * decode_bboxes_dims];
+    cur_box[1] = bboxes[act_box_idx * decode_bboxes_dims + 1];
+    cur_box[2] = bboxes[act_box_idx * decode_bboxes_dims + 2];
+    cur_box[3] = bboxes[act_box_idx * decode_bboxes_dims + 4];
+    cur_box[4] = bboxes[act_box_idx * decode_bboxes_dims + 3];
+    cur_box[5] = bboxes[act_box_idx * decode_bboxes_dims + 5];
+    cur_box[6] =
+        -bboxes[act_box_idx * decode_bboxes_dims + decode_bboxes_dims - 1] -
+        3.141592653589793 / 2;
+
+    int i = 0;
+    int64_t t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS);
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+void NmsLauncher(const cudaStream_t &stream, const float *bboxes,
+                 const int *index, const int64_t *sorted_index,
+                 const int num_bboxes, const int num_bboxes_for_nms,
+                 const float nms_overlap_thresh, const int decode_bboxes_dims,
+                 int64_t *mask) {
+  dim3 blocks(DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS),
+              DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS));
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  nms_kernel_centerpoint<<<blocks, threads, 0, stream>>>(
+      num_bboxes, num_bboxes_for_nms, nms_overlap_thresh, decode_bboxes_dims,
+      bboxes, index, sorted_index, mask);
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc
new file mode 100755
index 0000000000..af8f23d694
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cc
@@ -0,0 +1,208 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+template <typename T, typename T_int>
+bool hard_voxelize_cpu_kernel(
+    const T *points, const float point_cloud_range_x_min,
+    const float point_cloud_range_y_min, const float point_cloud_range_z_min,
+    const float voxel_size_x, const float voxel_size_y,
+    const float voxel_size_z, const int grid_size_x, const int grid_size_y,
+    const int grid_size_z, const int64_t num_points, const int num_point_dim,
+    const int max_num_points_in_voxel, const int max_voxels, T *voxels,
+    T_int *coords, T_int *num_points_per_voxel, T_int *grid_idx_to_voxel_idx,
+    T_int *num_voxels) {
+  std::fill(voxels,
+            voxels + max_voxels * max_num_points_in_voxel * num_point_dim,
+            static_cast<T>(0));
+
+  num_voxels[0] = 0;
+  int voxel_idx, grid_idx, curr_num_point;
+  int coord_x, coord_y, coord_z;
+  for (int point_idx = 0; point_idx < num_points; ++point_idx) {
+    coord_x = floor(
+        (points[point_idx * num_point_dim + 0] - point_cloud_range_x_min) /
+        voxel_size_x);
+    coord_y = floor(
+        (points[point_idx * num_point_dim + 1] - point_cloud_range_y_min) /
+        voxel_size_y);
+    coord_z = floor(
+        (points[point_idx * num_point_dim + 2] - point_cloud_range_z_min) /
+        voxel_size_z);
+
+    if (coord_x < 0 || coord_x > grid_size_x || coord_x == grid_size_x) {
+      continue;
+    }
+    if (coord_y < 0 || coord_y > grid_size_y || coord_y == grid_size_y) {
+      continue;
+    }
+    if (coord_z < 0 || coord_z > grid_size_z || coord_z == grid_size_z) {
+      continue;
+    }
+
+    grid_idx =
+        coord_z * grid_size_y * grid_size_x + coord_y * grid_size_x + coord_x;
+    voxel_idx = grid_idx_to_voxel_idx[grid_idx];
+    if (voxel_idx == -1) {
+      voxel_idx = num_voxels[0];
+      if (num_voxels[0] == max_voxels || num_voxels[0] > max_voxels) {
+        continue;
+      }
+      num_voxels[0]++;
+      grid_idx_to_voxel_idx[grid_idx] = voxel_idx;
+      coords[voxel_idx * 3 + 0] = coord_z;
+      coords[voxel_idx * 3 + 1] = coord_y;
+      coords[voxel_idx * 3 + 2] = coord_x;
+    }
+    curr_num_point = num_points_per_voxel[voxel_idx];
+    if (curr_num_point < max_num_points_in_voxel) {
+      for (int j = 0; j < num_point_dim; ++j) {
+        voxels[voxel_idx * max_num_points_in_voxel * num_point_dim +
+               curr_num_point * num_point_dim + j] =
+            points[point_idx * num_point_dim + j];
+      }
+      num_points_per_voxel[voxel_idx] = curr_num_point + 1;
+    }
+  }
+  return true;
+}
+
+std::vector<paddle::Tensor>
+hard_voxelize_cpu(const paddle::Tensor &points,
+                  const std::vector<float> &voxel_size,
+                  const std::vector<float> &point_cloud_range,
+                  const int max_num_points_in_voxel, const int max_voxels) {
+  auto num_points = points.shape()[0];
+  auto num_point_dim = points.shape()[1];
+
+  const float voxel_size_x = voxel_size[0];
+  const float voxel_size_y = voxel_size[1];
+  const float voxel_size_z = voxel_size[2];
+  const float point_cloud_range_x_min = point_cloud_range[0];
+  const float point_cloud_range_y_min = point_cloud_range[1];
+  const float point_cloud_range_z_min = point_cloud_range[2];
+  int grid_size_x = static_cast<int>(
+      round((point_cloud_range[3] - point_cloud_range[0]) / voxel_size_x));
+  int grid_size_y = static_cast<int>(
+      round((point_cloud_range[4] - point_cloud_range[1]) / voxel_size_y));
+  int grid_size_z = static_cast<int>(
+      round((point_cloud_range[5] - point_cloud_range[2]) / voxel_size_z));
+
+  auto voxels =
+      paddle::empty({max_voxels, max_num_points_in_voxel, num_point_dim},
+                    paddle::DataType::FLOAT32, paddle::CPUPlace());
+
+  auto coords = paddle::full({max_voxels, 3}, 0, paddle::DataType::INT32,
+                             paddle::CPUPlace());
+  auto *coords_data = coords.data<int>();
+
+  auto num_points_per_voxel = paddle::full(
+      {max_voxels}, 0, paddle::DataType::INT32, paddle::CPUPlace());
+  auto *num_points_per_voxel_data = num_points_per_voxel.data<int>();
+  std::fill(num_points_per_voxel_data,
+            num_points_per_voxel_data + num_points_per_voxel.size(),
+            static_cast<int>(0));
+
+  auto num_voxels =
+      paddle::full({1}, 0, paddle::DataType::INT32, paddle::CPUPlace());
+  auto *num_voxels_data = num_voxels.data<int>();
+
+  auto grid_idx_to_voxel_idx =
+      paddle::full({grid_size_z, grid_size_y, grid_size_x}, -1,
+                   paddle::DataType::INT32, paddle::CPUPlace());
+  auto *grid_idx_to_voxel_idx_data = grid_idx_to_voxel_idx.data<int>();
+
+  PD_DISPATCH_FLOATING_TYPES(
+      points.type(), "hard_voxelize_cpu_kernel", ([&] {
+        hard_voxelize_cpu_kernel<data_t, int>(
+            points.data<data_t>(), point_cloud_range_x_min,
+            point_cloud_range_y_min, point_cloud_range_z_min, voxel_size_x,
+            voxel_size_y, voxel_size_z, grid_size_x, grid_size_y, grid_size_z,
+            num_points, num_point_dim, max_num_points_in_voxel, max_voxels,
+            voxels.data<data_t>(), coords_data, num_points_per_voxel_data,
+            grid_idx_to_voxel_idx_data, num_voxels_data);
+      }));
+
+  return {voxels, coords, num_points_per_voxel, num_voxels};
+}
+
+#if defined(PADDLE_WITH_CUDA) && defined(WITH_GPU)
+std::vector<paddle::Tensor>
+hard_voxelize_cuda(const paddle::Tensor &points,
+                   const std::vector<float> &voxel_size,
+                   const std::vector<float> &point_cloud_range,
+                   int max_num_points_in_voxel, int max_voxels);
+#endif
+
+std::vector<paddle::Tensor>
+hard_voxelize(const paddle::Tensor &points,
+              const std::vector<float> &voxel_size,
+              const std::vector<float> &point_cloud_range,
+              const int max_num_points_in_voxel, const int max_voxels) {
+  if (points.is_cpu()) {
+    return hard_voxelize_cpu(points, voxel_size, point_cloud_range,
+                             max_num_points_in_voxel, max_voxels);
+#if defined(PADDLE_WITH_CUDA) && defined(WITH_GPU)
+  } else if (points.is_gpu() || points.is_gpu_pinned()) {
+    return hard_voxelize_cuda(points, voxel_size, point_cloud_range,
+                              max_num_points_in_voxel, max_voxels);
+#endif
+  } else {
+    PD_THROW("Unsupported device type for hard_voxelize "
+             "operator.");
+  }
+}
+
+std::vector<std::vector<int64_t>>
+HardInferShape(std::vector<int64_t> points_shape,
+               const std::vector<float> &voxel_size,
+               const std::vector<float> &point_cloud_range,
+               const int &max_num_points_in_voxel, const int &max_voxels) {
+  return {{max_voxels, max_num_points_in_voxel, points_shape[1]},
+          {max_voxels, 3},
+          {max_voxels},
+          {1}};
+}
+
+std::vector<paddle::DataType> HardInferDtype(paddle::DataType points_dtype) {
+  return {points_dtype, paddle::DataType::INT32, paddle::DataType::INT32,
+          paddle::DataType::INT32};
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
+
+PD_BUILD_OP(hard_voxelize)
+    .Inputs({"POINTS"})
+    .Outputs({"VOXELS", "COORS", "NUM_POINTS_PER_VOXEL", "num_voxels"})
+    .SetKernelFn(PD_KERNEL(ultrainfer::paddle_custom_ops::hard_voxelize))
+    .Attrs({"voxel_size: std::vector<float>",
+            "point_cloud_range: std::vector<float>",
+            "max_num_points_in_voxel: int", "max_voxels: int"})
+    .SetInferShapeFn(
+        PD_INFER_SHAPE(ultrainfer::paddle_custom_ops::HardInferShape))
+    .SetInferDtypeFn(
+        PD_INFER_DTYPE(ultrainfer::paddle_custom_ops::HardInferDtype));
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu
new file mode 100755
index 0000000000..4ab363da0b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/ops/voxelize_op.cu
@@ -0,0 +1,357 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
+#include "paddle/include/experimental/ext_all.h"
+#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
+#include "paddle/include/paddle/extension.h"
+#else
+#include "paddle/extension.h"
+#endif
+
+namespace ultrainfer {
+namespace paddle_custom_ops {
+
+#define CHECK_INPUT_CUDA(x)                                                    \
+  PD_CHECK(x.is_gpu() || x.is_gpu_pinned(), #x " must be a GPU Tensor.")
+
+#define CUDA_KERNEL_LOOP(i, n)                                                 \
+  for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n);                \
+       i += blockDim.x * gridDim.x)
+
+template <typename T, typename T_int>
+__global__ void init_num_point_grid(
+    const T *points, const float point_cloud_range_x_min,
+    const float point_cloud_range_y_min, const float point_cloud_range_z_min,
+    const float voxel_size_x, const float voxel_size_y,
+    const float voxel_size_z, const int grid_size_x, const int grid_size_y,
+    const int grid_size_z, const int64_t num_points, const int num_point_dim,
+    T_int *num_points_in_grid, int *points_valid) {
+  int64_t point_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (point_idx > num_points || point_idx == num_points) {
+    return;
+  }
+  int coord_x =
+      floor((points[point_idx * num_point_dim + 0] - point_cloud_range_x_min) /
+            voxel_size_x);
+  int coord_y =
+      floor((points[point_idx * num_point_dim + 1] - point_cloud_range_y_min) /
+            voxel_size_y);
+  int coord_z =
+      floor((points[point_idx * num_point_dim + 2] - point_cloud_range_z_min) /
+            voxel_size_z);
+
+  if (coord_x < 0 || coord_x > grid_size_x || coord_x == grid_size_x) {
+    return;
+  }
+  if (coord_y < 0 || coord_y > grid_size_y || coord_y == grid_size_y) {
+    return;
+  }
+  if (coord_z < 0 || coord_z > grid_size_z || coord_z == grid_size_z) {
+    return;
+  }
+
+  int grid_idx =
+      coord_z * grid_size_y * grid_size_x + coord_y * grid_size_x + coord_x;
+  num_points_in_grid[grid_idx] = 0;
+  points_valid[grid_idx] = num_points;
+}
+
+template <typename T, typename T_int>
+__global__ void map_point_to_grid_kernel(
+    const T *points, const float point_cloud_range_x_min,
+    const float point_cloud_range_y_min, const float point_cloud_range_z_min,
+    const float voxel_size_x, const float voxel_size_y,
+    const float voxel_size_z, const int grid_size_x, const int grid_size_y,
+    const int grid_size_z, const int64_t num_points, const int num_point_dim,
+    const int max_num_points_in_voxel, T_int *points_to_grid_idx,
+    T_int *points_to_num_idx, T_int *num_points_in_grid, int *points_valid) {
+  int64_t point_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (point_idx > num_points || point_idx == num_points) {
+    return;
+  }
+  int coord_x =
+      floor((points[point_idx * num_point_dim + 0] - point_cloud_range_x_min) /
+            voxel_size_x);
+  int coord_y =
+      floor((points[point_idx * num_point_dim + 1] - point_cloud_range_y_min) /
+            voxel_size_y);
+  int coord_z =
+      floor((points[point_idx * num_point_dim + 2] - point_cloud_range_z_min) /
+            voxel_size_z);
+
+  if (coord_x < 0 || coord_x > grid_size_x || coord_x == grid_size_x) {
+    return;
+  }
+  if (coord_y < 0 || coord_y > grid_size_y || coord_y == grid_size_y) {
+    return;
+  }
+  if (coord_z < 0 || coord_z > grid_size_z || coord_z == grid_size_z) {
+    return;
+  }
+
+  int grid_idx =
+      coord_z * grid_size_y * grid_size_x + coord_y * grid_size_x + coord_x;
+  T_int num = atomicAdd(num_points_in_grid + grid_idx, 1);
+  if (num < max_num_points_in_voxel) {
+    points_to_num_idx[point_idx] = num;
+    points_to_grid_idx[point_idx] = grid_idx;
+    atomicMin(points_valid + grid_idx, static_cast<int>(point_idx));
+  }
+}
+
+template <typename T_int>
+__global__ void update_points_flag(const int *points_valid,
+                                   const T_int *points_to_grid_idx,
+                                   const int num_points, int *points_flag) {
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  for (int i = tid; i < num_points; i += gridDim.x * blockDim.x) {
+    T_int grid_idx = points_to_grid_idx[i];
+    if (grid_idx >= 0) {
+      int id = points_valid[grid_idx];
+      if (id != num_points && id == i) {
+        points_flag[i] = 1;
+      }
+    }
+  }
+}
+
+template <typename T_int>
+__global__ void
+get_voxel_idx_kernel(const int *points_flag, const T_int *points_to_grid_idx,
+                     const int *points_flag_prefix_sum, const int num_points,
+                     const int max_voxels, T_int *num_voxels,
+                     T_int *grid_idx_to_voxel_idx) {
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  for (int i = tid; i < num_points; i += gridDim.x * blockDim.x) {
+    if (points_flag[i] == 1) {
+      T_int grid_idx = points_to_grid_idx[i];
+      int num = points_flag_prefix_sum[i];
+      if (num < max_voxels) {
+        grid_idx_to_voxel_idx[grid_idx] = num;
+      }
+    }
+    if (i == num_points - 1) {
+      int num = points_flag_prefix_sum[i] + points_flag[i];
+      if (num < max_voxels) {
+        num_voxels[0] = num;
+      } else {
+        num_voxels[0] = max_voxels;
+      }
+    }
+  }
+}
+
+template <typename T>
+__global__ void init_voxels_kernel(const int64_t num, T *voxels) {
+  int64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx > num || idx == num) {
+    return;
+  }
+  voxels[idx] = static_cast<T>(0);
+}
+
+template <typename T, typename T_int>
+__global__ void
+assign_voxels_kernel(const T *points, const T_int *points_to_grid_idx,
+                     const T_int *points_to_num_idx,
+                     const T_int *grid_idx_to_voxel_idx,
+                     const int64_t num_points, const int num_point_dim,
+                     const int max_num_points_in_voxel, T *voxels) {
+  int64_t point_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (point_idx > num_points || point_idx == num_points) {
+    return;
+  }
+  T_int grid_idx = points_to_grid_idx[point_idx];
+  T_int num_idx = points_to_num_idx[point_idx];
+  if (grid_idx > -1 && num_idx > -1) {
+    T_int voxel_idx = grid_idx_to_voxel_idx[grid_idx];
+    if (voxel_idx > -1) {
+      for (int64_t i = 0; i < num_point_dim; ++i) {
+        voxels[voxel_idx * max_num_points_in_voxel * num_point_dim +
+               num_idx * num_point_dim + i] =
+            points[point_idx * num_point_dim + i];
+      }
+    }
+  }
+}
+
+template <typename T, typename T_int>
+__global__ void
+assign_coords_kernel(const T_int *grid_idx_to_voxel_idx,
+                     const T_int *num_points_in_grid, const int num_grids,
+                     const int grid_size_x, const int grid_size_y,
+                     const int grid_size_z, const int max_num_points_in_voxel,
+                     T *coords, T *num_points_per_voxel) {
+  int64_t grid_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (grid_idx > num_grids || grid_idx == num_grids) {
+    return;
+  }
+  T_int voxel_idx = grid_idx_to_voxel_idx[grid_idx];
+  if (voxel_idx > -1) {
+    T_int coord_z = grid_idx / grid_size_x / grid_size_y;
+    T_int coord_y =
+        (grid_idx - coord_z * grid_size_x * grid_size_y) / grid_size_x;
+    T_int coord_x =
+        grid_idx - coord_z * grid_size_x * grid_size_y - coord_y * grid_size_x;
+    coords[voxel_idx * 3 + 0] = coord_z;
+    coords[voxel_idx * 3 + 1] = coord_y;
+    coords[voxel_idx * 3 + 2] = coord_x;
+    num_points_per_voxel[voxel_idx] =
+        min(num_points_in_grid[grid_idx], max_num_points_in_voxel);
+  }
+}
+
+std::vector<paddle::Tensor>
+hard_voxelize_cuda(const paddle::Tensor &points,
+                   const std::vector<float> &voxel_size,
+                   const std::vector<float> &point_cloud_range,
+                   int max_num_points_in_voxel, int max_voxels) {
+  // check device
+  CHECK_INPUT_CUDA(points);
+
+  int64_t num_points = points.shape()[0];
+  int64_t num_point_dim = points.shape()[1];
+
+  const float voxel_size_x = voxel_size[0];
+  const float voxel_size_y = voxel_size[1];
+  const float voxel_size_z = voxel_size[2];
+  const float point_cloud_range_x_min = point_cloud_range[0];
+  const float point_cloud_range_y_min = point_cloud_range[1];
+  const float point_cloud_range_z_min = point_cloud_range[2];
+  int grid_size_x = static_cast<int>(
+      round((point_cloud_range[3] - point_cloud_range[0]) / voxel_size_x));
+  int grid_size_y = static_cast<int>(
+      round((point_cloud_range[4] - point_cloud_range[1]) / voxel_size_y));
+  int grid_size_z = static_cast<int>(
+      round((point_cloud_range[5] - point_cloud_range[2]) / voxel_size_z));
+  int num_grids = grid_size_x * grid_size_y * grid_size_z;
+
+  auto voxels =
+      paddle::empty({max_voxels, max_num_points_in_voxel, num_point_dim},
+                    paddle::DataType::FLOAT32, paddle::GPUPlace());
+
+  auto coords = paddle::full({max_voxels, 3}, 0, paddle::DataType::INT32,
+                             paddle::GPUPlace());
+  auto *coords_data = coords.data<int>();
+
+  auto num_points_per_voxel = paddle::full(
+      {max_voxels}, 0, paddle::DataType::INT32, paddle::GPUPlace());
+  auto *num_points_per_voxel_data = num_points_per_voxel.data<int>();
+
+  auto points_to_grid_idx = paddle::full(
+      {num_points}, -1, paddle::DataType::INT32, paddle::GPUPlace());
+  auto *points_to_grid_idx_data = points_to_grid_idx.data<int>();
+
+  auto points_to_num_idx = paddle::full(
+      {num_points}, -1, paddle::DataType::INT32, paddle::GPUPlace());
+  auto *points_to_num_idx_data = points_to_num_idx.data<int>();
+
+  auto num_points_in_grid =
+      paddle::empty({grid_size_z, grid_size_y, grid_size_x},
+                    paddle::DataType::INT32, paddle::GPUPlace());
+  auto *num_points_in_grid_data = num_points_in_grid.data<int>();
+
+  auto grid_idx_to_voxel_idx =
+      paddle::full({grid_size_z, grid_size_y, grid_size_x}, -1,
+                   paddle::DataType::INT32, paddle::GPUPlace());
+  auto *grid_idx_to_voxel_idx_data = grid_idx_to_voxel_idx.data<int>();
+
+  auto num_voxels =
+      paddle::full({1}, 0, paddle::DataType::INT32, paddle::GPUPlace());
+  auto *num_voxels_data = num_voxels.data<int>();
+
+  auto points_valid =
+      paddle::empty({grid_size_z, grid_size_y, grid_size_x},
+                    paddle::DataType::INT32, paddle::GPUPlace());
+  int *points_valid_data = points_valid.data<int>();
+  auto points_flag = paddle::full({num_points}, 0, paddle::DataType::INT32,
+                                  paddle::GPUPlace());
+
+  // 1. Find the grid index for each point, compute the
+  // number of points in each grid
+  int64_t threads = 512;
+  int64_t blocks = (num_points + threads - 1) / threads;
+
+  PD_DISPATCH_FLOATING_TYPES(
+      points.type(), "init_num_point_grid", ([&] {
+        init_num_point_grid<data_t, int>
+            <<<blocks, threads, 0, points.stream()>>>(
+                points.data<data_t>(), point_cloud_range_x_min,
+                point_cloud_range_y_min, point_cloud_range_z_min, voxel_size_x,
+                voxel_size_y, voxel_size_z, grid_size_x, grid_size_y,
+                grid_size_z, num_points, num_point_dim, num_points_in_grid_data,
+                points_valid_data);
+      }));
+
+  PD_DISPATCH_FLOATING_TYPES(
+      points.type(), "map_point_to_grid_kernel", ([&] {
+        map_point_to_grid_kernel<data_t, int>
+            <<<blocks, threads, 0, points.stream()>>>(
+                points.data<data_t>(), point_cloud_range_x_min,
+                point_cloud_range_y_min, point_cloud_range_z_min, voxel_size_x,
+                voxel_size_y, voxel_size_z, grid_size_x, grid_size_y,
+                grid_size_z, num_points, num_point_dim, max_num_points_in_voxel,
+                points_to_grid_idx_data, points_to_num_idx_data,
+                num_points_in_grid_data, points_valid_data);
+      }));
+
+  // 2. Find the number of non-zero voxels
+  int *points_flag_data = points_flag.data<int>();
+
+  threads = 512;
+  blocks = (num_points + threads - 1) / threads;
+  update_points_flag<int><<<blocks, threads, 0, points.stream()>>>(
+      points_valid_data, points_to_grid_idx_data, num_points, points_flag_data);
+
+  auto points_flag_prefix_sum =
+      paddle::experimental::cumsum(points_flag, 0, false, true, false);
+  int *points_flag_prefix_sum_data = points_flag_prefix_sum.data<int>();
+  get_voxel_idx_kernel<int><<<blocks, threads, 0, points.stream()>>>(
+      points_flag_data, points_to_grid_idx_data, points_flag_prefix_sum_data,
+      num_points, max_voxels, num_voxels_data, grid_idx_to_voxel_idx_data);
+
+  // 3. Store points to voxels coords and num_points_per_voxel
+  int64_t num = max_voxels * max_num_points_in_voxel * num_point_dim;
+  threads = 512;
+  blocks = (num + threads - 1) / threads;
+  PD_DISPATCH_FLOATING_TYPES(points.type(), "init_voxels_kernel", ([&] {
+                               init_voxels_kernel<data_t>
+                                   <<<blocks, threads, 0, points.stream()>>>(
+                                       num, voxels.data<data_t>());
+                             }));
+
+  threads = 512;
+  blocks = (num_points + threads - 1) / threads;
+  PD_DISPATCH_FLOATING_TYPES(
+      points.type(), "assign_voxels_kernel", ([&] {
+        assign_voxels_kernel<data_t, int>
+            <<<blocks, threads, 0, points.stream()>>>(
+                points.data<data_t>(), points_to_grid_idx_data,
+                points_to_num_idx_data, grid_idx_to_voxel_idx_data, num_points,
+                num_point_dim, max_num_points_in_voxel, voxels.data<data_t>());
+      }));
+
+  // 4. Store coords, num_points_per_voxel
+  blocks = (num_grids + threads - 1) / threads;
+  assign_coords_kernel<int><<<blocks, threads, 0, points.stream()>>>(
+      grid_idx_to_voxel_idx_data, num_points_in_grid_data, num_grids,
+      grid_size_x, grid_size_y, grid_size_z, max_num_points_in_voxel,
+      coords_data, num_points_per_voxel_data);
+
+  return {voxels, coords, num_points_per_voxel, num_voxels};
+}
+
+} // namespace paddle_custom_ops
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h
new file mode 100755
index 0000000000..d57a888782
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option.h
@@ -0,0 +1,169 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_type.h"
+#include "ultrainfer/runtime/backends/tensorrt/option.h"
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+
+/*! @brief Option object to configure GraphCore IPU
+ */
+struct IpuOption {
+  /// IPU device id
+  int ipu_device_num;
+  /// the batch size in the graph, only work when graph has no batch shape info
+  int ipu_micro_batch_size;
+  /// enable pipelining
+  bool ipu_enable_pipelining;
+  /// the number of batches per run in pipelining
+  int ipu_batches_per_step;
+  /// enable fp16
+  bool ipu_enable_fp16;
+  /// the number of graph replication
+  int ipu_replica_num;
+  /// the available memory proportion for matmul/conv
+  float ipu_available_memory_proportion;
+  /// enable fp16 partial for matmul, only work with fp16
+  bool ipu_enable_half_partial;
+};
+
+/*! @brief Option object to configure KUNLUNXIN XPU
+ */
+struct XpuOption {
+  /// kunlunxin device id
+  int kunlunxin_device_id = 0;
+  /// EnableXpu
+  /// kunlunxin_l3_workspace_size
+  int kunlunxin_l3_workspace_size = 0xfffc00;
+  /// kunlunxin_locked
+  bool kunlunxin_locked = false;
+  /// kunlunxin_autotune
+  bool kunlunxin_autotune = true;
+  /// kunlunxin_autotune_file
+  std::string kunlunxin_autotune_file = "";
+  /// kunlunxin_precision
+  std::string kunlunxin_precision = "int16";
+  /// kunlunxin_adaptive_seqlen
+  bool kunlunxin_adaptive_seqlen = false;
+  /// kunlunxin_enable_multi_stream
+  bool kunlunxin_enable_multi_stream = false;
+  /// SetXpuConfig
+  /// quant post dynamic weight bits
+  int kunlunxin_quant_post_dynamic_weight_bits = -1;
+  /// quant post dynamic op types
+  std::vector<std::string> kunlunxin_quant_post_dynamic_op_types = {};
+};
+
+/*! @brief Option object to configure Paddle Inference backend
+ */
+struct PaddleBackendOption {
+  /// Print log information while initialize Paddle Inference backend
+  bool enable_log_info = false;
+  /// Enable MKLDNN while inference on CPU
+  bool enable_mkldnn = true;
+  /// Use Paddle Inference + TensorRT to inference model on GPU
+  bool enable_trt = false;
+  /// Whether enable memory optimize, default true
+  bool enable_memory_optimize = true;
+  /// Whether enable ir debug, default false
+  bool switch_ir_debug = false;
+  /// Whether enable ir optimize, default true
+  bool switch_ir_optimize = true;
+  /// Whether the load model is quantized model
+  bool is_quantize_model = false;
+  std::string inference_precision = "float32";
+  bool enable_inference_cutlass = false;
+
+  /*
+   * @brief IPU option, this will configure the IPU hardware, if inference model
+   * in IPU
+   */
+  IpuOption ipu_option;
+  /*
+   * @brief XPU option, this will configure the  KUNLUNXIN XPU hardware, if
+   * inference model in XPU
+   */
+  XpuOption xpu_option;
+
+  /// Collect shape for model while enable_trt is true
+  bool collect_trt_shape = false;
+  /// Collect shape for model by device (for some custom ops)
+  bool collect_trt_shape_by_device = false;
+  /// Cache input shape for mkldnn while the input data will change dynamiclly
+  int mkldnn_cache_size = -1;
+  /// initialize memory size(MB) for GPU
+  int gpu_mem_init_size = 100;
+  /// The option to enable fixed size optimization for transformer model
+  bool enable_fixed_size_opt = false;
+  /// min_subgraph_size for paddle-trt
+  int trt_min_subgraph_size = 3;
+
+#if PADDLEINFERENCE_VERSION_MAJOR == 2
+  bool enable_new_ir = false;
+#else
+  bool enable_new_ir = true;
+#endif
+
+  /// Disable type of operators run on TensorRT
+  void DisableTrtOps(const std::vector<std::string> &ops) {
+    trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
+  }
+
+  /// Delete pass by name
+  void DeletePass(const std::string &pass_name) {
+    delete_pass_names.push_back(pass_name);
+  }
+
+  void SetIpuConfig(bool enable_fp16, int replica_num,
+                    float available_memory_proportion,
+                    bool enable_half_partial) {
+    ipu_option.ipu_enable_fp16 = enable_fp16;
+    ipu_option.ipu_replica_num = replica_num;
+    ipu_option.ipu_available_memory_proportion = available_memory_proportion;
+    ipu_option.ipu_enable_half_partial = enable_half_partial;
+  }
+
+  void SetXpuConfig(
+      int quant_post_dynamic_weight_bits = -1,
+      const std::vector<std::string> &quant_post_dynamic_op_types = {}) {
+    xpu_option.kunlunxin_quant_post_dynamic_weight_bits =
+        quant_post_dynamic_weight_bits;
+    xpu_option.kunlunxin_quant_post_dynamic_op_types =
+        quant_post_dynamic_op_types;
+  }
+
+  // The belowing parameters may be removed, please do not
+  // read or write them directly
+  TrtBackendOption trt_option;
+  bool enable_pinned_memory = false;
+  void *external_stream_ = nullptr;
+  Device device = Device::CPU;
+  /// device id for CPU/GPU
+  int device_id = 0;
+  std::vector<std::string> trt_disabled_ops_{};
+  int cpu_thread_num = 8;
+  std::vector<std::string> delete_pass_names = {};
+  std::string model_file = "";  // Path of model file
+  std::string params_file = ""; // Path of parameters file, can be empty
+
+  // load model and paramters from memory
+  bool model_from_memory_ = false;
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc
new file mode 100755
index 0000000000..0bd104470a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/option_pybind.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/runtime/backends/paddle/option.h"
+
+namespace ultrainfer {
+
+void BindIpuOption(pybind11::module &m) {
+  pybind11::class_<IpuOption>(m, "IpuOption")
+      .def(pybind11::init())
+      .def_readwrite("ipu_device_num", &IpuOption::ipu_device_num)
+      .def_readwrite("ipu_micro_batch_size", &IpuOption::ipu_micro_batch_size)
+      .def_readwrite("ipu_enable_pipelining", &IpuOption::ipu_enable_pipelining)
+      .def_readwrite("ipu_batches_per_step", &IpuOption::ipu_batches_per_step)
+      .def_readwrite("ipu_enable_fp16", &IpuOption::ipu_enable_fp16)
+      .def_readwrite("ipu_replica_num", &IpuOption::ipu_replica_num)
+      .def_readwrite("ipu_available_memory_proportion",
+                     &IpuOption::ipu_available_memory_proportion)
+      .def_readwrite("ipu_enable_half_partial",
+                     &IpuOption::ipu_enable_half_partial);
+}
+
+void BindPaddleOption(pybind11::module &m) {
+  BindIpuOption(m);
+  pybind11::class_<PaddleBackendOption>(m, "PaddleBackendOption")
+      .def(pybind11::init())
+      .def_readwrite("enable_fixed_size_opt",
+                     &PaddleBackendOption::enable_fixed_size_opt)
+      .def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
+      .def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
+      .def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
+      .def_readwrite("enable_memory_optimize",
+                     &PaddleBackendOption::enable_memory_optimize)
+      .def_readwrite("switch_ir_debug", &PaddleBackendOption::switch_ir_debug)
+      .def_readwrite("ipu_option", &PaddleBackendOption::ipu_option)
+      .def_readwrite("xpu_option", &PaddleBackendOption::xpu_option)
+      .def_readwrite("trt_option", &PaddleBackendOption::trt_option)
+      .def_readwrite("collect_trt_shape",
+                     &PaddleBackendOption::collect_trt_shape)
+      .def_readwrite("collect_trt_shape_by_device",
+                     &PaddleBackendOption::collect_trt_shape_by_device)
+      .def_readwrite("mkldnn_cache_size",
+                     &PaddleBackendOption::mkldnn_cache_size)
+      .def_readwrite("gpu_mem_init_size",
+                     &PaddleBackendOption::gpu_mem_init_size)
+      .def_readwrite("is_quantize_model",
+                     &PaddleBackendOption::is_quantize_model)
+      .def_readwrite("inference_precision",
+                     &PaddleBackendOption::inference_precision)
+      .def_readwrite("enable_inference_cutlass",
+                     &PaddleBackendOption::enable_inference_cutlass)
+      .def_readwrite("trt_min_subgraph_size",
+                     &PaddleBackendOption::trt_min_subgraph_size)
+      .def_readwrite("enable_new_ir", &PaddleBackendOption::enable_new_ir)
+      .def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps)
+      .def("delete_pass", &PaddleBackendOption::DeletePass)
+      .def("set_ipu_config", &PaddleBackendOption::SetIpuConfig);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc
new file mode 100755
index 0000000000..3ee46c43f5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.cc
@@ -0,0 +1,650 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/paddle/paddle_backend.h"
+
+#include <sstream>
+
+#include "ultrainfer/utils/path.h"
+
+namespace ultrainfer {
+
+void PaddleBackend::BuildOption(const PaddleBackendOption &option) {
+  option_ = option;
+  if (option.device == Device::GPU) {
+    auto inference_precision = paddle_infer::PrecisionType::kFloat32;
+    if (option_.inference_precision == "float32") {
+      FDINFO << "Will inference_precision float32" << std::endl;
+      inference_precision = paddle_infer::PrecisionType::kFloat32;
+    } else if (option_.inference_precision == "float16") {
+      FDINFO << "Will inference_precision float16" << std::endl;
+      inference_precision = paddle_infer::PrecisionType::kHalf;
+    } else if (option_.inference_precision == "bfloat16") {
+      FDINFO << "Will inference_precision bfloat16" << std::endl;
+      inference_precision = paddle_infer::PrecisionType::kBf16;
+    } else if (option_.inference_precision == "int8") {
+      FDINFO << "Will inference_precision int8" << std::endl;
+      inference_precision = paddle_infer::PrecisionType::kInt8;
+    } else {
+      FDERROR << "paddle inference only support precision in float32,"
+              << " float16, bfloat16 and int8" << std::endl;
+    }
+    config_.Exp_DisableMixedPrecisionOps({"feed", "fetch"});
+    config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id,
+                         inference_precision);
+    // config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
+    if (option_.switch_ir_debug) {
+      FDINFO << "Will Enable ir_debug for Paddle Backend." << std::endl;
+      config_.SwitchIrDebug();
+    }
+    if (option_.enable_inference_cutlass) {
+#ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
+      FDWARNING
+          << "Your are using Paddle infernence 2.4.x, cutlass is not supported!"
+          << std::endl;
+#else
+      FDINFO << "Will enable_inference_cutlass" << std::endl;
+      config_.Exp_EnableUseCutlass();
+#endif
+    }
+    if (option_.external_stream_) {
+      FDINFO << "Will use external stream for Paddle Backend." << std::endl;
+      config_.SetExecStream(option_.external_stream_);
+    }
+    if (option.enable_trt) {
+      if (!option.trt_option.enable_fp16) {
+        FDINFO << "Will try to use tensorrt inference with Paddle Backend."
+               << std::endl;
+      }
+      config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
+      auto precision = paddle_infer::PrecisionType::kFloat32;
+      if (option.trt_option.enable_fp16) {
+        FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend."
+               << std::endl;
+        precision = paddle_infer::PrecisionType::kHalf;
+      }
+      bool use_static = false;
+      if (option.trt_option.serialize_file != "") {
+        FDWARNING
+            << "Detect that tensorrt cache file has been set to "
+            << option.trt_option.serialize_file
+            << ", but while enable paddle2trt, please notice that the cache "
+               "file will save to the directory where paddle model saved."
+            << std::endl;
+        use_static = true;
+        std::string opt_cache_dir =
+            GetDirFromPath(option.trt_option.serialize_file);
+
+        config_.SetOptimCacheDir(opt_cache_dir);
+      }
+      config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
+                                   option.trt_option.max_batch_size,
+                                   option.trt_min_subgraph_size, precision,
+                                   use_static);
+
+      SetTRTDynamicShapeToConfig(option);
+      if (option_.enable_fixed_size_opt) {
+        paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
+            &config_, "opt");
+      }
+    }
+  } else if (option.device == Device::IPU) {
+#ifdef WITH_IPU
+    config_.EnableIpu(option.ipu_option.ipu_device_num,
+                      option.ipu_option.ipu_micro_batch_size,
+                      option.ipu_option.ipu_enable_pipelining,
+                      option.ipu_option.ipu_batches_per_step);
+    config_.SetIpuConfig(option.ipu_option.ipu_enable_fp16,
+                         option.ipu_option.ipu_replica_num,
+                         option.ipu_option.ipu_available_memory_proportion,
+                         option.ipu_option.ipu_enable_half_partial);
+#else
+    FDWARNING << "The UltraInfer is not compiled with IPU device, so will "
+                 "fallback to CPU with Paddle Inference Backend."
+              << std::endl;
+#endif
+  } else if (option.device == Device::KUNLUNXIN) {
+#ifdef WITH_KUNLUNXIN
+    // Note(qiuyanjun): For Paddle XPU L3 Cache, please set
+    // export XPU_PADDLE_L3_SIZE=67104768 (XPU R200)
+    // export FLAGS_fuse_multi_transformer_quant_type="float"
+    config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
+                      option.xpu_option.kunlunxin_locked,
+                      option.xpu_option.kunlunxin_autotune,
+                      option.xpu_option.kunlunxin_autotune_file,
+                      option.xpu_option.kunlunxin_precision,
+                      option.xpu_option.kunlunxin_adaptive_seqlen,
+                      option.xpu_option.kunlunxin_enable_multi_stream);
+    config_.SetXpuConfig(
+        option.xpu_option.kunlunxin_quant_post_dynamic_weight_bits,
+        option.xpu_option.kunlunxin_quant_post_dynamic_op_types);
+    config_.SetXpuDeviceId(option.xpu_option.kunlunxin_device_id);
+#else
+    FDWARNING
+        << "The UltraInfer is not compiled with KUNLUNXIN device, so will "
+           "fallback to CPU with Paddle Inference Backend."
+        << std::endl;
+#endif
+  } else {
+    config_.DisableGpu();
+    if (option.enable_mkldnn) {
+      config_.EnableMKLDNN();
+      config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
+    } else {
+#if defined(PADDLEINFERENCE_API_COMPAT_2_6_x) ||                               \
+    (PADDLEINFERENCE_VERSION_MAJOR != 2)
+      config_.DisableMKLDNN();
+#endif
+    }
+  }
+
+  if (!option.enable_log_info) {
+    config_.DisableGlogInfo();
+  }
+  if (option.cpu_thread_num <= 0) {
+    config_.SetCpuMathLibraryNumThreads(8);
+  } else {
+    config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
+  }
+  // Note: SwitchIrOptim is enabled by default for paddle inference
+  // backend. So, we don't need to set it manually.
+  // config_.SwitchIrOptim(option.switch_ir_optimize);
+
+  if (option.enable_new_ir) {
+#if PADDLEINFERENCE_VERSION_MAJOR == 2
+    FDWARNING << "UltraInfer was compiled with Paddle Inference v2.0+ "
+                 "which does not support the new IR."
+              << std::endl;
+#else
+    if (option.device == Device::GPU && option.enable_trt) {
+      FDWARNING << "Currently, Paddle-TensorRT does not support the new IR, "
+                   "and the old IR will be used."
+                << std::endl;
+    } else {
+      config_.EnableNewIR();
+      config_.EnableNewExecutor();
+      if (option.device == Device::CPU || option.device == Device::GPU) {
+        config_.SetOptimizationLevel(3);
+      }
+    }
+#endif
+  }
+}
+
+bool PaddleBackend::Init(const RuntimeOption &runtime_option) {
+  if (!(Supported(runtime_option.model_format, Backend::PDINFER) &&
+        Supported(runtime_option.device, Backend::PDINFER))) {
+    return false;
+  }
+
+  auto option = runtime_option;
+  // Collect basic paddle inference option and trt option.
+  option.paddle_infer_option.model_file = runtime_option.model_file;
+  option.paddle_infer_option.params_file = runtime_option.params_file;
+  option.paddle_infer_option.model_from_memory_ =
+      runtime_option.model_from_memory_;
+  option.paddle_infer_option.device = runtime_option.device;
+  option.paddle_infer_option.device_id = runtime_option.device_id;
+  option.paddle_infer_option.enable_pinned_memory =
+      runtime_option.enable_pinned_memory;
+  option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
+  option.paddle_infer_option.trt_option = runtime_option.trt_option;
+  option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
+  // Note(qiuyanjun): For Ipu option and XPU option, please check the
+  // details of RuntimeOption::UseIpu() and RuntimeOption::UseKunlunXin().
+  // Futhermore, please check paddle_infer_option.SetIpuConfig() and
+  // paddle_infer_option.SetXpuConfig() for more details of extra configs.
+  return InitFromPaddle(option.model_file, option.params_file,
+                        option.model_from_memory_, option.paddle_infer_option);
+}
+
+bool PaddleBackend::InitFromPaddle(const std::string &model,
+                                   const std::string &params,
+                                   bool model_from_memory,
+                                   const PaddleBackendOption &option) {
+  if (initialized_) {
+    FDERROR << "PaddleBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  if (model_from_memory) {
+    config_.SetModelBuffer(model.c_str(), model.size(), params.c_str(),
+                           params.size());
+  } else {
+    config_.SetModel(model, params);
+  }
+  if (option.enable_memory_optimize) {
+    config_.EnableMemoryOptim();
+  }
+  BuildOption(option);
+  // The input/output information get from predictor is not right, use
+  // PaddleReader instead now
+  std::string model_content = model;
+  if (!model_from_memory) {
+    FDASSERT(ReadBinaryFromFile(model, &model_content),
+             "Failed to read file %s.", model.c_str());
+  }
+
+  if (option.is_quantize_model) {
+    if (option.device == Device::GPU) {
+      FDWARNING << "The loaded model is a quantized model, while inference on "
+                   "GPU, please use TensorRT backend to get better performance."
+                << std::endl;
+      if (option.enable_trt) {
+        bool use_static = false;
+        if (option.trt_option.serialize_file != "") {
+          FDWARNING
+              << "Detect that tensorrt cache file has been set to "
+              << option.trt_option.serialize_file
+              << ", but while enable paddle2trt, please notice that the cache "
+                 "file will save to the directory where paddle model saved."
+              << std::endl;
+          use_static = true;
+        }
+#if PADDLEINFERENCE_VERSION_MAJOR != 2
+        config_.EnableTensorRtEngine(
+            option.trt_option.max_workspace_size,
+            option.trt_option.max_batch_size, option.trt_min_subgraph_size,
+            paddle_infer::PrecisionType::kInt8, use_static, false, true);
+#else
+        config_.EnableTensorRtEngine(
+            option.trt_option.max_workspace_size,
+            option.trt_option.max_batch_size, option.trt_min_subgraph_size,
+            paddle_infer::PrecisionType::kInt8, use_static, false);
+#endif
+        SetTRTDynamicShapeToConfig(option);
+      }
+    }
+    if (option.enable_mkldnn) {
+      config_.EnableMkldnnInt8();
+    } else {
+      FDWARNING << "The loaded model is a quantized model, while inference on "
+                   "CPU, please enable MKLDNN to get better performance."
+                << std::endl;
+    }
+  }
+  if (option.collect_trt_shape) {
+    // Set the shape info file.
+    std::string curr_model_dir = "./";
+    if (!option.model_from_memory_) {
+      curr_model_dir = GetDirFromPath(option.model_file);
+    }
+    std::string shape_range_info =
+        PathJoin(curr_model_dir, "shape_range_info.pbtxt");
+    if (!CheckFileExists(shape_range_info)) {
+      FDINFO << "Start generating shape range info file." << std::endl;
+      paddle_infer::Config analysis_config;
+      if (model_from_memory) {
+        analysis_config.SetModelBuffer(model.c_str(), model.size(),
+                                       params.c_str(), params.size());
+      } else {
+        analysis_config.SetModel(model, params);
+      }
+      if (option.collect_trt_shape_by_device) {
+        if (option.device == Device::GPU) {
+          analysis_config.EnableUseGpu(option.gpu_mem_init_size,
+                                       option.device_id,
+                                       paddle_infer::PrecisionType::kFloat32);
+        }
+      }
+      analysis_config.CollectShapeRangeInfo(shape_range_info);
+      auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
+      std::map<std::string, std::vector<int>> max_shape;
+      std::map<std::string, std::vector<int>> min_shape;
+      std::map<std::string, std::vector<int>> opt_shape;
+      GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
+      std::map<std::string, std::vector<float>> max_input_data;
+      std::map<std::string, std::vector<float>> min_input_data;
+      std::map<std::string, std::vector<float>> opt_input_data;
+      if (!option.trt_option.min_input_data.empty()) {
+        GetInputDataFromOption(option, &max_input_data, &min_input_data,
+                               &opt_input_data);
+      }
+      // Need to run once to get the shape range info file.
+      CollectShapeRun(predictor_tmp.get(), max_shape, max_input_data);
+      CollectShapeRun(predictor_tmp.get(), min_shape, min_input_data);
+      CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
+      CollectShapeRun(predictor_tmp.get(), opt_shape, opt_input_data);
+      FDINFO << "Finish generating shape range info file." << std::endl;
+    }
+    FDINFO << "Start loading shape range info file " << shape_range_info
+           << " to set TensorRT dynamic shape." << std::endl;
+    config_.EnableTunedTensorRtDynamicShape(shape_range_info, true);
+  }
+  // Note(zhoushunjie): The pass deletion should be executed just before
+  // creating predictor.
+  if (!option.delete_pass_names.empty()) {
+    auto pass_builder = config_.pass_builder();
+    for (int i = 0; i < option.delete_pass_names.size(); i++) {
+      FDINFO << "Delete pass : " << option.delete_pass_names[i] << std::endl;
+      pass_builder->DeletePass(option.delete_pass_names[i]);
+    }
+  }
+  if (option.enable_log_info) {
+    FDINFO << "Finish paddle inference config with summary as: " << std::endl
+           << config_.Summary() << std::endl;
+  }
+  predictor_ = paddle_infer::CreatePredictor(config_);
+  auto input_names = predictor_->GetInputNames();
+  auto output_names = predictor_->GetOutputNames();
+  auto input_dtypes = predictor_->GetInputTypes();
+
+#ifdef PADDLEINFERENCE_API_COMPAT_2_4_x
+  // Note: GetInputTensorShape, GetOutputTensorShape and GetOutputTypes
+  // are not supported when Paddle Inference API version is 2.4.x.
+  std::map<std::string, std::vector<int64_t>> input_shapes;
+  std::map<std::string, std::vector<int64_t>> output_shapes;
+  std::map<std::string, paddle_infer::DataType> output_dtypes;
+  // Get the all the input shape info.
+  for (size_t i = 0; i < input_names.size(); ++i) {
+    std::vector<int64_t> shape;
+    auto handle = predictor_->GetInputHandle(input_names[i]);
+    for (int j = 0; j < handle->shape().size(); ++j) {
+      shape.push_back(
+          static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
+    }
+    input_shapes[input_names[i]] = shape;
+  }
+  // Get the all the output shape and dtype info.
+  for (size_t i = 0; i < output_names.size(); ++i) {
+    std::vector<int64_t> shape;
+    auto handle = predictor_->GetOutputHandle(output_names[i]);
+    for (int j = 0; j < handle->shape().size(); ++j) {
+      shape.push_back(
+          static_cast<int64_t>(handle->shape()[j])); // int32 -> int64
+    }
+    output_shapes[output_names[i]] = shape;
+    output_dtypes[output_names[i]] = handle->type();
+  }
+#else
+  auto input_shapes = predictor_->GetInputTensorShape();
+  auto output_shapes = predictor_->GetOutputTensorShape();
+  auto output_dtypes = predictor_->GetOutputTypes();
+#endif
+
+  inputs_desc_.resize(input_names.size());
+  for (int i = 0; i < input_names.size(); ++i) {
+    inputs_desc_[i].name = input_names[i];
+    auto iter = input_shapes.find(inputs_desc_[i].name);
+    FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.",
+             inputs_desc_[i].name.c_str());
+    inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
+    auto iter1 = input_dtypes.find(inputs_desc_[i].name);
+    FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.",
+             inputs_desc_[i].name.c_str());
+    inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
+  }
+  outputs_desc_.resize(output_names.size());
+  for (int i = 0; i < output_names.size(); ++i) {
+    outputs_desc_[i].name = output_names[i];
+    auto iter = output_shapes.find(outputs_desc_[i].name);
+    FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.",
+             outputs_desc_[i].name.c_str());
+    outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
+    auto iter1 = output_dtypes.find(outputs_desc_[i].name);
+    FDASSERT(iter1 != output_dtypes.end(),
+             "Cannot find data type for output %s.",
+             outputs_desc_[i].name.c_str());
+    outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
+  }
+
+  initialized_ = true;
+  return true;
+}
+
+TensorInfo PaddleBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of inputs: %d.", index,
+           NumInputs());
+  return inputs_desc_[index];
+}
+
+std::vector<TensorInfo> PaddleBackend::GetInputInfos() { return inputs_desc_; }
+
+TensorInfo PaddleBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs %d.", index,
+           NumOutputs());
+  return outputs_desc_[index];
+}
+
+std::vector<TensorInfo> PaddleBackend::GetOutputInfos() {
+  return outputs_desc_;
+}
+
+bool PaddleBackend::Infer(std::vector<FDTensor> &inputs,
+                          std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  if (inputs.size() != inputs_desc_.size()) {
+    FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << inputs_desc_.size() << ")." << std::endl;
+    return false;
+  }
+  // output share backend memory only support CPU or GPU
+  if (option_.device == Device::IPU) {
+    copy_to_fd = true;
+  }
+
+  RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    auto handle = predictor_->GetInputHandle(inputs[i].name);
+    ShareTensorFromFDTensor(handle.get(), inputs[i]);
+  }
+  // prebinded output only support for GPU
+  // if (!copy_to_fd) {
+  //   for (size_t i = 0; i < (*outputs).size(); ++i) {
+  //     auto output_name = (*outputs)[i].name;
+  //     // if a output is not prebinded,
+  //     // the name of output is expected to be empty.
+  //     // We skip here
+  //     if (output_name.empty()) {
+  //       continue;
+  //     }
+  //     // Record the prebinded output_name.
+  //     // Those outputs do not need PaddleTensorToFDTensor
+  //     // after predictor_.Run()
+  //     auto handle = predictor_->GetOutputHandle(output_name);
+  //     ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
+  //   }
+  // }
+
+  RUNTIME_PROFILE_LOOP_BEGIN(1)
+  predictor_->Run();
+  RUNTIME_PROFILE_LOOP_END
+
+  outputs->resize(outputs_desc_.size());
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
+    if (copy_to_fd) {
+      (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
+    }
+    PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd);
+  }
+  RUNTIME_PROFILE_LOOP_H2D_D2H_END
+  return true;
+}
+
+std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption &runtime_option,
+                                                  void *stream, int device_id) {
+  std::unique_ptr<BaseBackend> new_backend =
+      utils::make_unique<PaddleBackend>();
+  auto casted_backend = dynamic_cast<PaddleBackend *>(new_backend.get());
+  if (device_id > 0 && (option_.device == Device::GPU) &&
+      device_id != option_.device_id) {
+    auto clone_option = option_;
+    clone_option.device_id = device_id;
+    clone_option.external_stream_ = stream;
+    FDASSERT(casted_backend->InitFromPaddle(
+                 runtime_option.model_file, runtime_option.params_file,
+                 runtime_option.model_from_memory_, clone_option),
+             "Clone model from Paddle failed while initialize PaddleBackend.");
+    FDWARNING << "The target device id:" << device_id
+              << " is different from current device id:" << option_.device_id
+              << ", cannot share memory with current engine." << std::endl;
+    return new_backend;
+  }
+  casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
+  casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
+                                       outputs_desc_.end());
+  casted_backend->predictor_ = std::move(predictor_->Clone(stream));
+  return new_backend;
+}
+
+void PaddleBackend::SetTRTDynamicShapeToConfig(
+    const PaddleBackendOption &option) {
+  std::map<std::string, std::vector<int>> max_shape;
+  std::map<std::string, std::vector<int>> min_shape;
+  std::map<std::string, std::vector<int>> opt_shape;
+  GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
+  if (min_shape.size() > 0) {
+    FDINFO << "Start setting trt dynamic shape." << std::endl;
+    config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
+    FDINFO << "Finish setting trt dynamic shape." << std::endl;
+  }
+}
+
+void PaddleBackend::GetDynamicShapeFromOption(
+    const PaddleBackendOption &option,
+    std::map<std::string, std::vector<int>> *max_shape,
+    std::map<std::string, std::vector<int>> *min_shape,
+    std::map<std::string, std::vector<int>> *opt_shape) const {
+  auto print_shape = [](const std::vector<int> &shape) -> std::string {
+    std::ostringstream oss;
+    oss << "[";
+    for (int i = 0; i < shape.size(); ++i) {
+      oss << shape[i];
+      if (i < shape.size() - 1) {
+        oss << ", ";
+      }
+    }
+    oss << "]";
+    return oss.str();
+  };
+  for (const auto &item : option.trt_option.min_shape) {
+    auto max_iter = option.trt_option.max_shape.find(item.first);
+    auto opt_iter = option.trt_option.opt_shape.find(item.first);
+    FDASSERT(max_iter != option.trt_option.max_shape.end(),
+             "Cannot find %s in TrtBackendOption::min_shape.",
+             item.first.c_str());
+    FDASSERT(opt_iter != option.trt_option.opt_shape.end(),
+             "Cannot find %s in TrtBackendOption::opt_shape.",
+             item.first.c_str());
+    (*max_shape)[item.first].assign(max_iter->second.begin(),
+                                    max_iter->second.end());
+    (*opt_shape)[item.first].assign(opt_iter->second.begin(),
+                                    opt_iter->second.end());
+    (*min_shape)[item.first].assign(item.second.begin(), item.second.end());
+    FDINFO << item.first
+           << ": the max shape = " << print_shape(max_iter->second)
+           << ", the min shape = " << print_shape(item.second)
+           << ", the opt shape = " << print_shape(opt_iter->second)
+           << std::endl;
+  }
+}
+
+void PaddleBackend::GetInputDataFromOption(
+    const PaddleBackendOption &option,
+    std::map<std::string, std::vector<float>> *max_input_data,
+    std::map<std::string, std::vector<float>> *min_input_data,
+    std::map<std::string, std::vector<float>> *opt_input_data) const {
+  for (const auto &item : option.trt_option.min_input_data) {
+    auto max_iter = option.trt_option.max_input_data.find(item.first);
+    auto opt_iter = option.trt_option.opt_input_data.find(item.first);
+    FDASSERT(max_iter != option.trt_option.max_input_data.end(),
+             "Cannot find %s in TrtBackendOption::min_input_data.",
+             item.first.c_str());
+    FDASSERT(opt_iter != option.trt_option.opt_input_data.end(),
+             "Cannot find %s in TrtBackendOption::opt_input_data.",
+             item.first.c_str());
+    (*max_input_data)[item.first].assign(max_iter->second.begin(),
+                                         max_iter->second.end());
+    (*opt_input_data)[item.first].assign(opt_iter->second.begin(),
+                                         opt_iter->second.end());
+    (*min_input_data)[item.first].assign(item.second.begin(),
+                                         item.second.end());
+  }
+}
+
+void PaddleBackend::CollectShapeRun(
+    paddle_infer::Predictor *predictor,
+    const std::map<std::string, std::vector<int>> &shape,
+    const std::map<std::string, std::vector<float>> &data) const {
+  auto input_names = predictor->GetInputNames();
+  auto input_type = predictor->GetInputTypes();
+  for (const auto &name : input_names) {
+    FDASSERT(shape.find(name) != shape.end() &&
+                 input_type.find(name) != input_type.end(),
+             "When collect_trt_shape is true, please define max/opt/min shape "
+             "for model's input:[\"%s\"] by "
+             "(C++)RuntimeOption.trt_option.SetShape/"
+             "(Python)RuntimeOption.trt_option.set_shape.",
+             name.c_str());
+    auto tensor = predictor->GetInputHandle(name);
+    auto shape_value = shape.at(name);
+    int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
+                                    std::multiplies<int>());
+    tensor->Reshape(shape_value);
+
+    if (data.find(name) != data.end()) {
+      FDASSERT(data.at(name).size() == shape_num,
+               "The data num and accumulate of shape must be equal for input: "
+               "[\"%s\"], "
+               " When Use the (C++)RuntimeOption.trt_option.SetInputData/ "
+               " (Python)RuntimeOption.trt_option.set_input_data/",
+               name.c_str());
+    }
+
+    auto dtype = input_type[name];
+    switch (dtype) {
+    case paddle_infer::DataType::FLOAT32: {
+      if (data.find(name) != data.end()) {
+        tensor->CopyFromCpu(data.at(name).data());
+      } else {
+        std::vector<float> input_data(shape_num, 1.0);
+        tensor->CopyFromCpu(input_data.data());
+      }
+      break;
+    }
+    case paddle_infer::DataType::INT32: {
+      if (data.find(name) != data.end()) {
+        std::vector<int> input_data(data.at(name).begin(), data.at(name).end());
+        tensor->CopyFromCpu(input_data.data());
+      } else {
+        std::vector<int> input_data(shape_num, 1);
+        tensor->CopyFromCpu(input_data.data());
+      }
+      break;
+    }
+    case paddle_infer::DataType::INT64: {
+      if (data.find(name) != data.end()) {
+        std::vector<int64_t> input_data(data.at(name).begin(),
+                                        data.at(name).end());
+        tensor->CopyFromCpu(input_data.data());
+      } else {
+        std::vector<int64_t> input_data(shape_num, 1);
+        tensor->CopyFromCpu(input_data.data());
+      }
+      break;
+    }
+    default: {
+      FDASSERT(false, "Input data Paddle backend only supports "
+                      "FP32/INT32/INT64 currently.");
+      break;
+    }
+    }
+  }
+  predictor->Run();
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h
new file mode 100755
index 0000000000..874b8b7f2b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/paddle_backend.h
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/backends/paddle/option.h"
+#ifdef ENABLE_PADDLE2ONNX
+#include "paddle2onnx/converter.h"
+#endif
+#include "paddle/include/paddle_inference_api.h" // NOLINT
+#include "ultrainfer/utils/unique_ptr.h"
+
+namespace ultrainfer {
+
+// convert FD device to paddle place type
+paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device);
+
+// Share memory buffer with paddle_infer::Tensor from ultrainfer::FDTensor
+void ShareTensorFromFDTensor(paddle_infer::Tensor *tensor, FDTensor &fd_tensor);
+
+void ShareOutTensorFromFDTensor(paddle_infer::Tensor *tensor,
+                                FDTensor &fd_tensor);
+
+// convert paddle_infer::Tensor to ultrainfer::FDTensor
+// if copy_to_fd is true, copy memory data to FDTensor
+/// else share memory to FDTensor
+void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor> &tensor,
+                            FDTensor *fd_tensor, bool copy_to_fd);
+
+// Convert data type from paddle inference to ultrainfer
+FDDataType PaddleDataTypeToFD(const paddle_infer::DataType &dtype);
+
+// Convert data type from paddle2onnx::PaddleReader to ultrainfer
+FDDataType ReaderDataTypeToFD(int32_t dtype);
+
+class PaddleBackend : public BaseBackend {
+public:
+  PaddleBackend() {}
+  virtual ~PaddleBackend() = default;
+  bool Init(const RuntimeOption &option);
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+
+  int NumInputs() const override { return inputs_desc_.size(); }
+
+  int NumOutputs() const override { return outputs_desc_.size(); }
+
+  std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
+                                     void *stream = nullptr,
+                                     int device_id = -1) override;
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+
+private:
+  void BuildOption(const PaddleBackendOption &option);
+
+  bool
+  InitFromPaddle(const std::string &model, const std::string &params,
+                 bool model_from_memory,
+                 const PaddleBackendOption &option = PaddleBackendOption());
+
+  void
+  CollectShapeRun(paddle_infer::Predictor *predictor,
+                  const std::map<std::string, std::vector<int>> &shape,
+                  const std::map<std::string, std::vector<float>> &data) const;
+  void GetDynamicShapeFromOption(
+      const PaddleBackendOption &option,
+      std::map<std::string, std::vector<int>> *max_shape,
+      std::map<std::string, std::vector<int>> *min_shape,
+      std::map<std::string, std::vector<int>> *opt_shape) const;
+  void GetInputDataFromOption(
+      const PaddleBackendOption &option,
+      std::map<std::string, std::vector<float>> *max_input_data,
+      std::map<std::string, std::vector<float>> *min_input_data,
+      std::map<std::string, std::vector<float>> *opt_input_data) const;
+  void SetTRTDynamicShapeToConfig(const PaddleBackendOption &option);
+  PaddleBackendOption option_;
+  paddle_infer::Config config_;
+  std::shared_ptr<paddle_infer::Predictor> predictor_;
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc
new file mode 100755
index 0000000000..f5e1e01741
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/paddle/util.cc
@@ -0,0 +1,236 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/core/float16.h"
+#include "ultrainfer/runtime/backends/paddle/paddle_backend.h"
+
+namespace ultrainfer {
+paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) {
+  if (device == Device::GPU) {
+    return paddle_infer::PlaceType::kGPU;
+  } else if (device == Device::KUNLUNXIN) {
+    return paddle_infer::PlaceType::kXPU;
+  }
+  return paddle_infer::PlaceType::kCPU;
+}
+
+void ShareTensorFromFDTensor(paddle_infer::Tensor *tensor,
+                             FDTensor &fd_tensor) {
+  std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
+  tensor->Reshape(shape);
+  auto place = ConvertFDDeviceToPlace(fd_tensor.device);
+  if (fd_tensor.dtype == FDDataType::FP32) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<const float *>(fd_tensor.Data()),
+                                shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const float *>(fd_tensor.Data()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT32) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<const int32_t *>(fd_tensor.Data()),
+                                shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const int32_t *>(fd_tensor.Data()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT64) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<const int64_t *>(fd_tensor.Data()),
+                                shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const int64_t *>(fd_tensor.Data()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT8) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<const int8_t *>(fd_tensor.Data()),
+                                shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const int8_t *>(fd_tensor.Data()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::UINT8) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<const uint8_t *>(fd_tensor.Data()),
+                                shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const uint8_t *>(fd_tensor.Data()));
+    }
+    return;
+  }
+  FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
+           Str(fd_tensor.dtype).c_str());
+}
+
+void ShareOutTensorFromFDTensor(paddle_infer::Tensor *tensor,
+                                FDTensor &fd_tensor) {
+  std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
+  auto place = ConvertFDDeviceToPlace(fd_tensor.device);
+  if (fd_tensor.dtype == FDDataType::FP32) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<float *>(fd_tensor.MutableData()),
+                                shape, place);
+    } else {
+      tensor->CopyToCpu(static_cast<float *>(fd_tensor.MutableData()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT32) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<int32_t *>(fd_tensor.MutableData()),
+                                shape, place);
+    } else {
+      tensor->CopyToCpu(static_cast<int32_t *>(fd_tensor.MutableData()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT64) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<int64_t *>(fd_tensor.MutableData()),
+                                shape, place);
+    } else {
+      tensor->CopyToCpu(static_cast<int64_t *>(fd_tensor.MutableData()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT8) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<const int8_t *>(fd_tensor.Data()),
+                                shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const int8_t *>(fd_tensor.Data()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::UINT8) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<const uint8_t *>(fd_tensor.Data()),
+                                shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const uint8_t *>(fd_tensor.Data()));
+    }
+    return;
+  }
+  FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
+           Str(fd_tensor.dtype).c_str());
+}
+
+void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor> &tensor,
+                            FDTensor *fd_tensor, bool copy_to_fd) {
+  auto fd_dtype = PaddleDataTypeToFD(tensor->type());
+  std::vector<int64_t> shape;
+  auto tmp_shape = tensor->shape();
+  shape.assign(tmp_shape.begin(), tmp_shape.end());
+  if (copy_to_fd) {
+    fd_tensor->Resize(shape, fd_dtype, tensor->name());
+    if (fd_tensor->dtype == FDDataType::FP32) {
+      tensor->CopyToCpu(static_cast<float *>(fd_tensor->MutableData()));
+      return;
+    } else if (fd_tensor->dtype == FDDataType::INT32) {
+      tensor->CopyToCpu(static_cast<int32_t *>(fd_tensor->MutableData()));
+      return;
+    } else if (fd_tensor->dtype == FDDataType::INT64) {
+      tensor->CopyToCpu(static_cast<int64_t *>(fd_tensor->MutableData()));
+      return;
+    } else if (fd_tensor->dtype == FDDataType::INT8) {
+      tensor->CopyToCpu(static_cast<int8_t *>(fd_tensor->MutableData()));
+      return;
+    } else if (fd_tensor->dtype == FDDataType::UINT8) {
+      tensor->CopyToCpu(static_cast<uint8_t *>(fd_tensor->MutableData()));
+      return;
+    }
+    FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
+             Str(fd_tensor->dtype).c_str());
+  } else {
+    paddle_infer::PlaceType place;
+    int size = 0;
+    // TODO(liqi): The tensor->data interface of paddle don't return device id
+    //               and don't support return void*.
+    void *out_data = nullptr;
+    if (fd_dtype == FDDataType::FP32) {
+      out_data = tensor->data<float>(&place, &size);
+    } else if (fd_dtype == FDDataType::INT32) {
+      out_data = tensor->data<int>(&place, &size);
+    } else if (fd_dtype == FDDataType::INT64) {
+      out_data = tensor->data<int64_t>(&place, &size);
+    } else if (fd_dtype == FDDataType::INT8) {
+      out_data = tensor->data<int8_t>(&place, &size);
+    } else if (fd_dtype == FDDataType::UINT8) {
+      out_data = tensor->data<uint8_t>(&place, &size);
+    } else {
+      FDASSERT(
+          false,
+          "Unexpected data type(%s) while infer shared with PaddleBackend.",
+          Str(fd_dtype).c_str());
+    }
+    Device device = Device::CPU;
+    if (place == paddle_infer::PlaceType::kGPU) {
+      device = Device::GPU;
+    } else if (place == paddle_infer::PlaceType::kXPU) {
+      device = Device::KUNLUNXIN;
+      FDASSERT(false, "Currently, copy_to_fd=false, FDTensor SetExternalData "
+                      "is not support for Device::KUNLUNXIN now!")
+    }
+    fd_tensor->name = tensor->name();
+    fd_tensor->SetExternalData(shape, fd_dtype, out_data, device);
+  }
+}
+
+FDDataType PaddleDataTypeToFD(const paddle_infer::DataType &dtype) {
+  auto fd_dtype = FDDataType::FP32;
+  if (dtype == paddle_infer::FLOAT32) {
+    fd_dtype = FDDataType::FP32;
+  } else if (dtype == paddle_infer::INT64) {
+    fd_dtype = FDDataType::INT64;
+  } else if (dtype == paddle_infer::INT32) {
+    fd_dtype = FDDataType::INT32;
+  } else if (dtype == paddle_infer::UINT8) {
+    fd_dtype = FDDataType::UINT8;
+  } else if (dtype == paddle_infer::INT8) {
+    fd_dtype = FDDataType::INT8;
+  } else if (dtype == paddle_infer::FLOAT16) {
+    fd_dtype = FDDataType::FP16;
+  } else {
+    FDASSERT(
+        false,
+        "Unexpected data type: %d while call CopyTensorToCpu in PaddleBackend.",
+        int(dtype));
+  }
+  return fd_dtype;
+}
+
+FDDataType ReaderDataTypeToFD(int32_t dtype) {
+  auto fd_dtype = FDDataType::FP32;
+  if (dtype == 0) {
+    fd_dtype = FDDataType::FP32;
+  } else if (dtype == 1) {
+    fd_dtype = FDDataType::FP64;
+  } else if (dtype == 2) {
+    fd_dtype = FDDataType::UINT8;
+  } else if (dtype == 3) {
+    fd_dtype = FDDataType::INT8;
+  } else if (dtype == 4) {
+    fd_dtype = FDDataType::INT32;
+  } else if (dtype == 5) {
+    fd_dtype = FDDataType::INT64;
+  } else if (dtype == 6) {
+    fd_dtype = FDDataType::FP16;
+  } else {
+    FDASSERT(false,
+             "Unexpected data type: %d while call ReaderDataTypeToFD in "
+             "PaddleBackend.",
+             dtype);
+  }
+  return fd_dtype;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h
new file mode 100755
index 0000000000..03d86c3cdf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/compile.h
@@ -0,0 +1,170 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <set>
+#include <string>
+#include <unordered_map>
+
+#include "iengine.h"      // NOLINT
+#include "poros_module.h" // NOLINT
+#include "torch/script.h" // NOLINT
+
+namespace baidu {
+namespace mirana {
+namespace poros {
+
+/**
+ * @brief  compile graph
+ *
+ * @param [in] module : original module
+ * @param [in] input_ivalues : prewarm datas
+ * @param [in] options : Inference options
+ * @return porosmodule
+ * @retval !nullptr => succeed  nullptr => failed
+ **/
+std::unique_ptr<PorosModule>
+Compile(const torch::jit::Module &module,
+        const std::vector<std::vector<c10::IValue>> &prewarm_datas,
+        const PorosOptions &options);
+
+class Compiler {
+public:
+  typedef std::unordered_map<const torch::jit::Node *, IEngine *> engine_map_t;
+  typedef std::vector<std::vector<c10::IValue>> ivalue_vec_t;
+
+  Compiler() : _origin_module(NULL) {}
+  ~Compiler();
+
+  /**
+   * @brief initial Compiler
+   *
+   * @param [in] options : poros options
+   * @return  int
+   * @retval 0 => succeed  <0 => failed
+   **/
+  int init(const PorosOptions &options);
+
+  /**
+   * @brief compile whole graph
+   *
+   * @param [in] origin_module
+   * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
+   * @param [out] optimized_module : optimized graph
+   * @return  int
+   * @retval 0 => succeed  <0 => failed
+   **/
+  int compile(const torch::jit::Module &origin_module,
+              const ivalue_vec_t &prewarm_datas,
+              torch::jit::Module *optimized_module);
+
+private:
+  /**
+   * @brief preprocess this calculation graph
+   *
+   * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
+   * @param [out] graph : preprcessed graph
+   * @return  int
+   * @retval 0 => succeed  <0 => failed
+   **/
+  int preprocess_graph(const ivalue_vec_t &prewarm_datas,
+                       std::shared_ptr<torch::jit::Graph> &graph);
+
+  /**
+   * @brief segement this calculation graph
+   *
+   * @param [in/out] graph
+   * @return  int
+   * @retval 0 => succeed  <0 => failed
+   **/
+  int segment_graph(std::shared_ptr<torch::jit::Graph> &graph);
+
+  // Split subgraph（block)
+  // The divided subgraph, as a subgraph, is associated with the block
+  int segment_block(torch::jit::Block &block, IEngine *engine,
+                    int current_depth);
+
+  // Subgraph optimization
+  /**
+   * @brief Subgraph optimization
+   *
+   * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
+   * @param [in] opt_graph : ivalue_vec_t, vector of IValue
+   * @param [out] optimized_module : optimized graph
+   * @return  int
+   * @retval 0 => succeed  <0 => failed
+   **/
+  int optimize_subgraph(const ivalue_vec_t &prewarm_datas,
+                        const std::shared_ptr<torch::jit::Graph> &opt_graph,
+                        torch::jit::Module *optimized_module);
+
+  // Subgraph optimization(block)
+  int optimize_subblock(torch::jit::Block *block,
+                        torch::jit::Module *optimized_module);
+
+  /**
+   * @brief Compile the subgraph into a new graph based on the engine
+   *
+   * @param [in] engine : The engine used by the subgraph
+   * @param [in] subgraph_node : Subgraph node
+   * @return [out] module : Transformed model
+   * @retval 0 => succeed  <0 => failed
+   **/
+  int transform(IEngine *engine, torch::jit::Node &subgraph_node,
+                torch::jit::Module &module);
+
+  /**
+   * @brief Select engine based on subgraph and options
+   *
+   * @param [in] node : Jit Node
+   * @return  int
+   * @retval 0 => succeed  <0 => failed
+   **/
+  IEngine *select_engine(const torch::jit::Node *n);
+
+  /**
+   * @brief destory
+   *
+   * @return  void
+   **/
+  void close();
+
+private:
+  int _max_segment_depth{5};   // Maximum subgraph segmentation depth
+  ivalue_vec_t _prewarm_datas; // Prewarm datas
+  PorosOptions _options;
+  engine_map_t _engine_map; // The engine used to record the subgraph
+  const torch::jit::Module *_origin_module; // Origin_module
+  std::atomic<int> _engine_index = {0};     // Record engine index
+};
+
+/**
+ * @brief  compile graph, internal use
+ *
+ * @param [in] module : Origin module
+ * @param [in] input_ivalues : Prewarm datas
+ * @param [in] options : Inference options
+ * @return optimized_module
+ * @retval !nullptr => succeed  nullptr => failed
+ **/
+std::unique_ptr<torch::jit::Module>
+CompileGraph(const torch::jit::Module &module,
+             const std::vector<std::vector<c10::IValue>> &prewarm_datas,
+             const PorosOptions &options);
+
+} // namespace poros
+} // namespace mirana
+} // namespace baidu
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h
new file mode 100755
index 0000000000..908ac4253b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/iengine.h
@@ -0,0 +1,82 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+
+// from pytorch
+#include "ATen/core/interned_strings.h" // NOLINT
+#include "torch/csrc/jit/ir/ir.h"       // NOLINT
+#include "torch/script.h"               // NOLINT
+
+#include "plugin_create.h" // NOLINT
+
+namespace baidu {
+namespace mirana {
+namespace poros {
+
+struct PorosGraph {
+  torch::jit::Graph *graph = NULL;
+  torch::jit::Node *node = NULL;
+};
+
+typedef uint64_t EngineID;
+
+class IEngine : public IPlugin, public torch::CustomClassHolder {
+public:
+  virtual ~IEngine() {}
+
+  /**
+   * @brief init, initialization must be successful if the init is successful
+   * @return int
+   * @retval 0 => success, <0 => fail
+   **/
+  virtual int init() = 0;
+
+  /**
+   * @brief During compilation, the subgraph is converted into the graph
+   *structure of the corresponding engine and stored inside the engine, so that
+   *the execute_engine at runtime can be called
+   * @param [in] sub_graph  : subgraph
+   * @return [res]int
+   * @retval 0 => success, <0 => fail
+   **/
+  virtual int transform(const PorosGraph &sub_graph) = 0;
+
+  /**
+   * @brief Subgraph execution period logic
+   * @param [in] inputs  : input tensor
+   * @return [res] output tensor
+   **/
+  virtual std::vector<at::Tensor>
+  excute_engine(const std::vector<at::Tensor> &inputs) = 0;
+
+  virtual void register_module_attribute(const std::string &name,
+                                         torch::jit::Module &module) = 0;
+
+  // Logo
+  virtual const std::string who_am_i() = 0;
+
+  // Whether the node is supported by the current engine
+  bool is_node_supported(const torch::jit::Node *node);
+
+public:
+  std::pair<uint64_t, uint64_t> _num_io; // Number of input/output parameters
+  EngineID _id;
+};
+
+} // namespace poros
+} // namespace mirana
+} // namespace baidu
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h
new file mode 100755
index 0000000000..1b4cb3b62f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/plugin_create.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+namespace baidu {
+namespace mirana {
+namespace poros {
+
+class IPlugin {
+public:
+  virtual ~IPlugin() {}
+  virtual const std::string who_am_i() = 0;
+};
+
+typedef IPlugin *(*plugin_creator_t)();
+typedef std::unordered_map<std::string, plugin_creator_t> plugin_creator_map_t;
+
+IPlugin *create_plugin(const std::string &plugin_name);
+IPlugin *create_plugin(const std::string &plugin_name,
+                       const plugin_creator_map_t &plugin_creator_map);
+
+void create_all_plugins(const plugin_creator_map_t &plugin_creator_map,
+                        std::unordered_map<std::string, IPlugin *> &plugin_m);
+// void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
+
+template <typename PluginType> IPlugin *default_plugin_creator() {
+  return new (std::nothrow) PluginType;
+}
+
+void register_plugin_creator(const std::string &plugin_name,
+                             plugin_creator_t creator);
+void register_plugin_creator(const std::string &plugin_name,
+                             plugin_creator_t creator,
+                             plugin_creator_map_t &plugin_creator_map);
+
+template <typename PluginType>
+void register_plugin_class(const std::string &plugin_name) {
+  return register_plugin_creator(plugin_name,
+                                 default_plugin_creator<PluginType>);
+}
+
+// This version is recommended
+template <typename PluginType>
+void register_plugin_class(const std::string &plugin_name,
+                           plugin_creator_map_t &plugin_creator_map) {
+  return register_plugin_creator(
+      plugin_name, default_plugin_creator<PluginType>, plugin_creator_map);
+}
+
+} // namespace poros
+} // namespace mirana
+} // namespace baidu
+
+/* vim: set ts=4 sw=4 sts=4 tw=100 */
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h
new file mode 100755
index 0000000000..0d20ab26ac
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/common/poros_module.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "torch/csrc/jit/jit_log.h" // NOLINT
+#include "torch/script.h"           // NOLINT
+#include <string>
+// #include "ATen/Context.h"
+
+namespace baidu {
+namespace mirana {
+namespace poros {
+
+enum Device : int8_t { GPU = 0, CPU, XPU, UNKNOW };
+
+struct PorosOptions {
+  Device device = GPU;
+  bool debug = false;
+  bool use_fp16 = false;
+  bool is_dynamic = false;
+  bool long_to_int = true;
+  uint64_t max_workspace_size = 1ULL << 30;
+  int32_t device_id = -1;
+  int32_t unconst_ops_thres = -1;
+  bool use_nvidia_tf32 = false;
+};
+
+class PorosModule : public torch::jit::Module {
+public:
+  PorosModule(torch::jit::Module module)
+      : torch::jit::Module(module) {} // NOLINT
+  ~PorosModule() = default;
+
+  void to_device(Device device) { _options.device = device; }
+
+  // c10::IValue forward(std::vector<c10::IValue> inputs);
+  // void save(const std::string& filename);
+public:
+  PorosOptions _options;
+};
+
+// via porosmodule.save
+std::unique_ptr<PorosModule> Load(const std::string &filename,
+                                  const PorosOptions &options);
+
+} // namespace poros
+} // namespace mirana
+} // namespace baidu
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h
new file mode 100755
index 0000000000..9e65db1089
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option.h
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_type.h"
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+
+/*! @brief Option object to configure Poros backend
+ */
+struct PorosBackendOption {
+  Device device = Device::CPU;
+  int device_id = 0;
+  bool long_to_int = true;
+  // There is calculation precision in tf32 mode on A10, it can bring some
+  // performance improvement, but there may be diff
+  bool use_nvidia_tf32 = false;
+  // Threshold for the number of non-const ops
+  int32_t unconst_ops_thres = -1;
+  std::string poros_file = "";
+  std::vector<FDDataType> prewarm_datatypes = {FDDataType::FP32};
+  // TRT options
+  bool enable_fp16 = false;
+  bool enable_int8 = false;
+  bool is_dynamic = false;
+  size_t max_batch_size = 32;
+  size_t max_workspace_size = 1 << 30;
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc
new file mode 100755
index 0000000000..86b257c3ba
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/option_pybind.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/runtime/backends/poros/option.h"
+
+namespace ultrainfer {
+
+void BindPorosOption(pybind11::module &m) {
+  pybind11::class_<PorosBackendOption>(m, "PorosBackendOption")
+      .def(pybind11::init())
+      .def_readwrite("long_to_int", &PorosBackendOption::long_to_int)
+      .def_readwrite("use_nvidia_tf32", &PorosBackendOption::use_nvidia_tf32)
+      .def_readwrite("unconst_ops_thres",
+                     &PorosBackendOption::unconst_ops_thres)
+      .def_readwrite("prewarm_datatypes",
+                     &PorosBackendOption::prewarm_datatypes)
+      .def_readwrite("enable_fp16", &PorosBackendOption::enable_fp16)
+      .def_readwrite("enable_int8", &PorosBackendOption::enable_int8)
+      .def_readwrite("is_dynamic", &PorosBackendOption::is_dynamic)
+      .def_readwrite("max_batch_size", &PorosBackendOption::max_batch_size)
+      .def_readwrite("max_workspace_size",
+                     &PorosBackendOption::max_workspace_size);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc
new file mode 100755
index 0000000000..d03b2b85be
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.cc
@@ -0,0 +1,175 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/poros/poros_backend.h"
+
+#include <sys/time.h>
+
+namespace ultrainfer {
+
+TensorInfo PorosBackend::GetInputInfo(int index) {
+  // eager mode cann't obtain input information before infer
+  TensorInfo info_input;
+  return info_input;
+}
+
+TensorInfo PorosBackend::GetOutputInfo(int index) {
+  // eager mode cann't obtain output information before infer
+  TensorInfo info_output;
+  return info_output;
+}
+
+std::vector<TensorInfo> PorosBackend::GetInputInfos() {
+  // eager mode cann't obtain inputs information before infer
+  std::vector<TensorInfo> info_inputs;
+  return info_inputs;
+}
+
+std::vector<TensorInfo> PorosBackend::GetOutputInfos() {
+  // eager mode cann't obtain outputs information before infer
+  std::vector<TensorInfo> info_outputs;
+  return info_outputs;
+}
+
+void PorosBackend::BuildOption(const PorosBackendOption &option) {
+  _options.device = (option.device == Device::GPU)
+                        ? baidu::mirana::poros::Device::GPU
+                        : baidu::mirana::poros::Device::CPU;
+  _options.long_to_int = option.long_to_int;
+  _options.use_nvidia_tf32 = option.use_nvidia_tf32;
+  _options.device_id = option.device_id;
+  _options.unconst_ops_thres = option.unconst_ops_thres;
+  _options.is_dynamic = option.is_dynamic;
+  _options.max_workspace_size = option.max_workspace_size;
+  _options.use_fp16 = option.enable_fp16;
+  return;
+}
+
+bool PorosBackend::Compile(const std::string &model_file,
+                           std::vector<std::vector<FDTensor>> &prewarm_tensors,
+                           const PorosBackendOption &option) {
+  if (initialized_) {
+    FDERROR << "PorosBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  BuildOption(option);
+  torch::jit::Module mod;
+  mod = torch::jit::load(model_file);
+  mod.eval();
+  if (option.device == Device::GPU) {
+    mod.to(at::kCUDA);
+  } else {
+    mod.to(at::kCPU);
+  }
+  // get inputs_nums and outputs_nums
+  auto graph = mod.get_method("forward").graph();
+  auto inputs = graph->inputs();
+  // remove self node
+  _numinputs = inputs.size() - 1;
+  // FDTensor to at::Tensor
+  std::vector<std::vector<c10::IValue>> prewarm_datas;
+  bool is_backend_cuda = (option.device == Device::GPU);
+  for (size_t i = 0; i < prewarm_tensors.size(); ++i) {
+    std::vector<c10::IValue> prewarm_data;
+    for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) {
+      auto tensor = CreatePorosValue(prewarm_tensors[i][j], is_backend_cuda);
+      prewarm_data.push_back(tensor);
+    }
+    prewarm_datas.push_back(prewarm_data);
+  }
+  // get outputs nums
+  auto temp_result = mod.forward(prewarm_datas[0]);
+  size_t outputs_nums = 0;
+  if (temp_result.isTensor()) {
+    outputs_nums += 1;
+  } else if (temp_result.isTuple()) {
+    auto temp_result_tuple = temp_result.toTuple();
+    for (size_t i = 0; i < temp_result_tuple->elements().size(); ++i) {
+      auto poros_tensor = temp_result_tuple->elements()[i];
+      if (poros_tensor.isTensor()) {
+        outputs_nums += 1;
+      } else if (poros_tensor.isList()) {
+        auto poros_tensor_list = poros_tensor.toList();
+        outputs_nums += poros_tensor_list.size();
+      } else if (poros_tensor.isTuple()) {
+        auto poros_tensor_tuple = poros_tensor.toTuple();
+        outputs_nums += poros_tensor_tuple->elements().size();
+      } else {
+        continue;
+      }
+    }
+  }
+  _numoutputs = outputs_nums;
+  _poros_module = baidu::mirana::poros::Compile(mod, prewarm_datas, _options);
+  if (_poros_module == nullptr) {
+    FDERROR << "PorosBackend initlize Failed, try initialize again."
+            << std::endl;
+    return false;
+  }
+  initialized_ = true;
+  return true;
+}
+
+bool PorosBackend::Infer(std::vector<FDTensor> &inputs,
+                         std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  // Convert FD Tensor to PyTorch Tensor
+  std::vector<torch::jit::IValue> poros_inputs;
+  bool is_backend_cuda =
+      _options.device == baidu::mirana::poros::Device::GPU ? true : false;
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    poros_inputs.push_back(CreatePorosValue(inputs[i], is_backend_cuda));
+  }
+  // Infer
+  auto poros_outputs = _poros_module->forward(poros_inputs);
+  // Convert PyTorch Tensor to FD Tensor
+  if (poros_outputs.isTensor()) {
+    CopyTensorToCpu(poros_outputs.toTensor(), &((*outputs)[0]),
+                    is_backend_cuda);
+  } else if (poros_outputs.isTuple()) {
+    // deal with multi outputs
+    auto poros_outputs_tuple = poros_outputs.toTuple();
+    size_t index = 0;
+    for (size_t i = 0; i < poros_outputs_tuple->elements().size(); ++i) {
+      auto poros_tensor = poros_outputs_tuple->elements()[i];
+      if (poros_tensor.isTensor()) {
+        CopyTensorToCpu(poros_tensor.toTensor(), &((*outputs)[index]),
+                        is_backend_cuda);
+        index += 1;
+      } else if (poros_tensor.isList()) {
+        auto poros_tensor_list = poros_tensor.toList();
+        for (const auto list_idx : c10::irange(0, poros_tensor_list.size())) {
+          const auto &elt = poros_tensor_list.get(list_idx);
+          CopyTensorToCpu(elt.toTensor(), &((*outputs)[index]),
+                          is_backend_cuda);
+          index += 1;
+        }
+      } else if (poros_tensor.isTuple()) {
+        auto poros_tensor_tuple = poros_tensor.toTuple();
+        for (size_t j = 0; j < poros_tensor_tuple->elements().size(); ++j) {
+          CopyTensorToCpu(poros_tensor_tuple->elements()[j].toTensor(),
+                          &((*outputs)[index]), is_backend_cuda);
+          index += 1;
+        }
+      } else {
+        continue;
+      }
+    }
+  } else {
+    FDERROR << "Convert to FDTensor Failed!!!!!" << std::endl;
+  }
+  return true;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h
new file mode 100755
index 0000000000..89dd88e889
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/poros_backend.h
@@ -0,0 +1,91 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/backends/poros/common/compile.h"
+#include "ultrainfer/runtime/backends/poros/common/poros_module.h"
+#include "ultrainfer/runtime/backends/poros/option.h"
+
+namespace ultrainfer {
+
+// Convert data type from ultrainfer to poros
+at::ScalarType GetPorosDtype(const FDDataType &fd_dtype);
+
+// Convert data type from poros to ultrainfer
+FDDataType GetFdDtype(const at::ScalarType &dtype);
+
+// at::ScalarType to std::string for FDERROR
+std::string AtType2String(const at::ScalarType &dtype);
+
+// Create at::Tensor
+// is_backend_cuda specify if Poros use GPU Device
+// While is_backend_cuda = true, and tensor.device = Device::GPU
+at::Tensor CreatePorosValue(FDTensor &tensor, bool is_backend_cuda = false);
+
+// Copy memory data from at::Tensor to ultrainfer::FDTensor
+void CopyTensorToCpu(const at::Tensor &tensor, FDTensor *fd_tensor,
+                     bool is_backend_cuda = false);
+
+class PorosBackend : public BaseBackend {
+public:
+  PorosBackend() {}
+  virtual ~PorosBackend() = default;
+
+  void BuildOption(const PorosBackendOption &option);
+
+  bool Init(const RuntimeOption &option) {
+    if (!(Supported(option.model_format, Backend::POROS) &&
+          Supported(option.device, Backend::POROS))) {
+      return false;
+    }
+    if (option.model_from_memory_) {
+      FDERROR << "Poros backend doesn't support load model "
+              << "from memory, please load model from disk." << std::endl;
+      return false;
+    }
+    return true;
+  }
+
+  bool Compile(const std::string &model_file,
+               std::vector<std::vector<FDTensor>> &prewarm_tensors,
+               const PorosBackendOption &option = PorosBackendOption());
+
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+
+  int NumInputs() const { return _numinputs; }
+
+  int NumOutputs() const { return _numoutputs; }
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+
+private:
+  baidu::mirana::poros::PorosOptions _options;
+  std::unique_ptr<baidu::mirana::poros::PorosModule> _poros_module;
+  std::vector<std::vector<c10::IValue>> _prewarm_datas;
+  int _numinputs = 1;
+  int _numoutputs = 1;
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc b/libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc
new file mode 100755
index 0000000000..e3b11b743b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/poros/utils.cc
@@ -0,0 +1,185 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/backends/poros/poros_backend.h"
+
+#ifdef WITH_GPU
+#include <cuda_runtime_api.h>
+#endif
+
+namespace ultrainfer {
+
+std::string AtType2String(const at::ScalarType &dtype) {
+  std::string out;
+  switch (dtype) {
+  case at::kByte:
+    out = "at::kByte";
+    break;
+  case at::kChar:
+    out = "at::kChar";
+    break;
+  case at::kShort:
+    out = "at::kShort";
+    break;
+  case at::kInt:
+    out = "at::kInt";
+    break;
+  case at::kLong:
+    out = "at::kLong";
+    break;
+  case at::kHalf:
+    out = "at::kHalf";
+    break;
+  case at::kFloat:
+    out = "at::kFloat";
+    break;
+  case at::kDouble:
+    out = "at::kDouble";
+    break;
+  default:
+    out = "at::UNKNOWN";
+  }
+  return out;
+}
+
+at::ScalarType GetPorosDtype(const FDDataType &fd_dtype) {
+  if (fd_dtype == FDDataType::FP32) {
+    return at::kFloat;
+  } else if (fd_dtype == FDDataType::FP64) {
+    return at::kDouble;
+  } else if (fd_dtype == FDDataType::INT32) {
+    return at::kInt;
+  } else if (fd_dtype == FDDataType::INT64) {
+    return at::kLong;
+  }
+  FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
+          << std::endl;
+  return at::kFloat;
+}
+
+FDDataType GetFdDtype(const at::ScalarType &poros_dtype) {
+  if (poros_dtype == at::kFloat) {
+    return FDDataType::FP32;
+  } else if (poros_dtype == at::kDouble) {
+    return FDDataType::FP64;
+  } else if (poros_dtype == at::kInt) {
+    return FDDataType::INT32;
+  } else if (poros_dtype == at::kLong) {
+    return FDDataType::INT64;
+  }
+  FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype)
+          << "." << std::endl;
+  return FDDataType::FP32;
+}
+
+at::Tensor CreatePorosValue(FDTensor &tensor, bool is_backend_cuda) {
+  FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
+           "Only support tensor which device is CPU or GPU for PorosBackend.");
+  auto data_type = GetPorosDtype(tensor.dtype);
+  size_t numel = tensor.Numel();
+  at::Tensor poros_value;
+  if (is_backend_cuda) {
+    poros_value = std::move(
+        at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous());
+  } else {
+    poros_value = std::move(
+        at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous());
+  }
+  if (data_type == at::kFloat) {
+    if (is_backend_cuda) {
+      cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+                 numel * sizeof(float), cudaMemcpyHostToDevice);
+    } else {
+      memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+             numel * sizeof(float));
+    }
+  } else if (data_type == at::kInt) {
+    if (is_backend_cuda) {
+      cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+                 numel * sizeof(int32_t), cudaMemcpyHostToDevice);
+    } else {
+      memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+             numel * sizeof(int32_t));
+    }
+  } else if (data_type == at::kLong) {
+    if (is_backend_cuda) {
+      cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+                 numel * sizeof(int64_t), cudaMemcpyHostToDevice);
+    } else {
+      memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+             numel * sizeof(int64_t));
+    }
+  } else if (data_type == at::kDouble) {
+    if (is_backend_cuda) {
+      cudaMemcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+                 numel * sizeof(double), cudaMemcpyHostToDevice);
+    } else {
+      memcpy(poros_value.data_ptr(), static_cast<void *>(tensor.Data()),
+             numel * sizeof(double));
+    }
+  } else {
+    FDASSERT(false, "Unrecognized data type while calling "
+                    "PorosBackend::CreatePorosValue().");
+  }
+  return poros_value;
+}
+
+void CopyTensorToCpu(const at::Tensor &tensor, FDTensor *fd_tensor,
+                     bool is_backend_cuda) {
+  const auto data_type = tensor.scalar_type();
+  std::vector<int64_t> shape;
+  auto sizes = tensor.sizes();
+  for (size_t i = 0; i < sizes.size(); i++) {
+    shape.push_back(sizes[i]);
+  }
+  auto fd_dtype = GetFdDtype(data_type);
+  fd_tensor->Resize(shape, fd_dtype);
+  size_t numel = tensor.numel();
+  // at::Tensor -> FDTensor
+  if (data_type == at::kFloat) {
+    if (is_backend_cuda) {
+      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float));
+    }
+    return;
+  } else if (data_type == at::kInt) {
+    if (is_backend_cuda) {
+      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t));
+    }
+    return;
+  } else if (data_type == at::kLong) {
+    if (is_backend_cuda) {
+      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t));
+    }
+    return;
+  } else if (data_type == at::kDouble) {
+    if (is_backend_cuda) {
+      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double));
+    }
+    return;
+  }
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h
new file mode 100755
index 0000000000..dfe40f3254
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/option.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+namespace ultrainfer {
+namespace rknpu2 {
+typedef enum _rknpu2_cpu_name {
+  RK356X = 0, /* run on RK356X. */
+  RK3588 = 1, /* default,run on RK3588. */
+  UNDEFINED,
+} CpuName;
+
+/* The specification of NPU core setting.It has the following choices :
+ * RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will
+ * select the idle core inside the NPU.
+ * RKNN_NPU_CORE_0 : Running on the NPU0 core.
+ * RKNN_NPU_CORE_1: Runing on the NPU1 core.
+ * RKNN_NPU_CORE_2: Runing on the NPU2 core.
+ * RKNN_NPU_CORE_0_1: Running on both NPU0 and NPU1 core simultaneously.
+ * RKNN_NPU_CORE_0_1_2: Running on both NPU0, NPU1 and NPU2 simultaneously.
+ */
+typedef enum _rknpu2_core_mask {
+  RKNN_NPU_CORE_AUTO = 0,
+  RKNN_NPU_CORE_0 = 1,
+  RKNN_NPU_CORE_1 = 2,
+  RKNN_NPU_CORE_2 = 4,
+  RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1,
+  RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2,
+  RKNN_NPU_CORE_UNDEFINED,
+} CoreMask;
+} // namespace rknpu2
+
+struct RKNPU2BackendOption {
+  rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588;
+  rknpu2::CoreMask core_mask = rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc
new file mode 100755
index 0000000000..f0a50e3596
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.cc
@@ -0,0 +1,593 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h"
+namespace ultrainfer {
+RKNPU2Backend::~RKNPU2Backend() {
+  if (tensor_attrs_init_) {
+    if (input_attrs_ != nullptr) {
+      free(input_attrs_);
+    }
+
+    if (output_attrs_ != nullptr) {
+      free(output_attrs_);
+    }
+  }
+
+  if (tensor_memory_init_) {
+    for (uint32_t i = 0; i < io_num_.n_input; i++) {
+      rknn_destroy_mem(ctx_, input_mems_[i]);
+    }
+
+    for (uint32_t i = 0; i < io_num_.n_output; i++) {
+      rknn_destroy_mem(ctx_, output_mems_[i]);
+    }
+  }
+}
+
+/*
+ *  @name       RuntimeOptionIsApplicable
+ *  @brief      This function is used to determine whether the RuntimeOption
+ *              meets the operating conditions of RKNPU2.
+ *  @param      None
+ *  @return     bool
+ *  @note       None
+ */
+bool RKNPU2Backend::RuntimeOptionIsApplicable(
+    const RuntimeOption &runtime_option) {
+  if (!Supported(runtime_option.model_format, Backend::RKNPU2)) {
+    FDERROR << "The model format is not supported for RKNPU2." << std::endl;
+    return false;
+  }
+
+  if (!Supported(runtime_option.device, Backend::RKNPU2)) {
+    FDERROR << "The device is not supported for RKNPU2." << std::endl;
+    return false;
+  }
+
+  if (runtime_option.model_from_memory_) {
+    FDERROR << "RKNPU2 backend doesn't support load model from memory, please "
+               "load model from disk."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+/*
+ *  @name       GetSDKAndDeviceVersion
+ *  @brief      Get RKNPU2 sdk and device version.
+ *  @param      None
+ *  @return     bool
+ *  @note       The private variable ctx_ must be initialized.
+ */
+bool RKNPU2Backend::GetSDKAndDeviceVersion() {
+  int ret;
+  ret = rknn_query(ctx_, RKNN_QUERY_SDK_VERSION, &sdk_ver_, sizeof(sdk_ver_));
+  if (ret != RKNN_SUCC) {
+    FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl;
+    return false;
+  }
+  FDINFO << "rknpu2 runtime version: " << sdk_ver_.api_version << std::endl;
+  FDINFO << "rknpu2 driver version: " << sdk_ver_.drv_version << std::endl;
+  return true;
+}
+
+/*
+ *  @name      BuildOption
+ *  @brief     Save option and set core mask.
+ *  @param     RKNPU2BackendOption
+ *  @note      None
+ */
+void RKNPU2Backend::BuildOption(const RKNPU2BackendOption &option) {
+  option_ = option;
+
+  // save cpu_name
+  option_.cpu_name = option.cpu_name;
+
+  // save context
+  option_.core_mask = option.core_mask;
+
+  // set core mask
+  if (option_.cpu_name == rknpu2::CpuName::RK3588) {
+    if (!SetCoreMask(option_.core_mask)) {
+      FDERROR << "set core mask failed" << std::endl;
+    }
+  }
+}
+
+/***************************************************************
+ *  @name       Init
+ *  @brief      Initialize RKNN model
+ *  @param      model_file: Binary data for the RKNN model or the path of RKNN
+ *  @return     bool
+ *  @note       None
+ ***************************************************************/
+bool RKNPU2Backend::Init(const RuntimeOption &runtime_option) {
+  if (!RuntimeOptionIsApplicable(runtime_option)) {
+    FDERROR << "Runtime option is not applicable." << std::endl;
+    return false;
+  }
+
+  if (!LoadModel((char *)runtime_option.model_file.data())) {
+    FDERROR << "Load model failed" << std::endl;
+    return false;
+  }
+
+  if (!InitInputAndOutputNumber()) {
+    FDERROR << "Init input and output number failed" << std::endl;
+    return false;
+  }
+
+  if (!GetSDKAndDeviceVersion()) {
+    FDERROR << "Get SDK and device version failed" << std::endl;
+    return false;
+  }
+
+  BuildOption(runtime_option.rknpu2_option);
+
+  if (!InitInputAndOutputInformation()) {
+    FDERROR << "Get model input output information failed" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+/*
+ *  @name       SetCoreMask
+ *  @brief      Set NPU core for model
+ *  @param      core_mask: The specification of NPU core setting.
+ *  @return     bool
+ *  @note       Only support RK3588
+ */
+bool RKNPU2Backend::SetCoreMask(const rknpu2::CoreMask &core_mask) const {
+  if (option_.cpu_name != rknpu2::CpuName::RK3588) {
+    FDINFO << "SetCoreMask only support when soc is RK3588." << std::endl;
+    return false;
+  }
+
+  int ret = rknn_set_core_mask(ctx_, static_cast<rknn_core_mask>(core_mask));
+  if (ret != RKNN_SUCC) {
+    FDERROR << "The function(rknn_set_core_mask) failed! ret=" << ret
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+/*
+ *  @name       LoadModel
+ *  @brief      Read the model and initialize rknn context.
+ *  @param      model: Binary data for the RKNN model or the path of RKNN model.
+ *  @return     bool
+ *  @note       None
+ */
+bool RKNPU2Backend::LoadModel(void *model) {
+  int ret = RKNN_SUCC;
+  ret = rknn_init(&ctx_, model, 0, 0, nullptr);
+  if (ret != RKNN_SUCC) {
+    FDERROR << "The function(rknn_init) failed! ret=" << ret << std::endl;
+    return false;
+  }
+  return true;
+}
+
+/*
+ *  @name       InitInputAndOutputNumber
+ *  @brief      Initialize io_num_.
+ *  @param
+ *  @return     bool
+ *  @note       The private variable ctx must be initialized to use this
+ * function.
+ */
+bool RKNPU2Backend::InitInputAndOutputNumber() {
+  if (io_num_init_) {
+    FDERROR << "The private variable io_num_ has been initialized."
+            << std::endl;
+    return false;
+  }
+  int ret = RKNN_SUCC;
+  ret = rknn_query(ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num_, sizeof(io_num_));
+  if (ret != RKNN_SUCC) {
+    FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl;
+    return false;
+  }
+  io_num_init_ = true;
+  return true;
+}
+
+/*
+ *  @name       InitRKNNTensorAddress
+ *  @brief      Allocate memory for input_attrs_ and output_attrs_.
+ *  @param      None
+ *  @return     bool
+ *  @note       None
+ */
+bool RKNPU2Backend::InitRKNNTensorAddress() {
+  if (tensor_attrs_init_) {
+    FDERROR << "Private variable input_attrs_ and output_attrs_ memory has "
+               "been allocated. Please do not allocate memory repeatedly or "
+               "memory leak may occur."
+            << std::endl;
+    return false;
+  }
+
+  if (!io_num_init_) {
+    InitInputAndOutputNumber();
+  }
+
+  if (io_num_.n_input == 0) {
+    FDERROR << "The number of input tensors is 0." << std::endl;
+    return false;
+  }
+
+  if (io_num_.n_output == 0) {
+    FDERROR << "The number of output tensors is 0." << std::endl;
+    return false;
+  }
+
+  // Allocate memory for private variable input_attrs_.
+  input_attrs_ =
+      (rknn_tensor_attr *)malloc(sizeof(rknn_tensor_attr) * io_num_.n_input);
+  memset(input_attrs_, 0, io_num_.n_input * sizeof(rknn_tensor_attr));
+  for (uint32_t i = 0; i < io_num_.n_input; i++) {
+    int ret = RKNN_SUCC;
+    input_attrs_[i].index = i;
+    ret = rknn_query(ctx_, RKNN_QUERY_INPUT_ATTR, &(input_attrs_[i]),
+                     sizeof(rknn_tensor_attr));
+
+    if (ret != RKNN_SUCC) {
+      FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl;
+      return false;
+    }
+
+    if ((input_attrs_[i].fmt != RKNN_TENSOR_NHWC) &&
+        (input_attrs_[i].fmt != RKNN_TENSOR_UNDEFINED)) {
+      FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED"
+              << std::endl;
+      return false;
+    }
+
+    DumpTensorAttr(input_attrs_[i]);
+  }
+
+  // Allocate memory for private variable output_attrs_.
+  output_attrs_ =
+      (rknn_tensor_attr *)malloc(sizeof(rknn_tensor_attr) * io_num_.n_output);
+  memset(output_attrs_, 0, io_num_.n_output * sizeof(rknn_tensor_attr));
+  for (uint32_t i = 0; i < io_num_.n_output; i++) {
+    int ret = RKNN_SUCC;
+    output_attrs_[i].index = i;
+    ret = rknn_query(ctx_, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs_[i]),
+                     sizeof(rknn_tensor_attr));
+
+    if (ret != RKNN_SUCC) {
+      FDERROR << "The function(rknn_query) failed! ret=" << ret << std::endl;
+      return false;
+    }
+
+    // UltraInfer Only support postprocess when output type is fp32,
+    // so output_attrs_.type needs to be fixed as RKNN_TENSOR_FLOAT32.
+    output_attrs_[i].type = RKNN_TENSOR_FLOAT32;
+    DumpTensorAttr(output_attrs_[i]);
+  }
+  tensor_attrs_init_ = true;
+  return true;
+}
+
+/*
+ *  @name       InitInputAndOutputInformation
+ *  @brief      Get the detailed input and output information of Model
+ *  @param      None
+ *  @return     bool
+ *  @note       None
+ */
+bool RKNPU2Backend::InitInputAndOutputInformation() {
+  if (!io_num_init_) {
+    InitInputAndOutputNumber();
+  }
+
+  if (!tensor_attrs_init_) {
+    InitRKNNTensorAddress();
+  }
+
+  if (io_num_.n_input == 0) {
+    FDERROR << "The number of input tensors is 0." << std::endl;
+    return false;
+  }
+
+  if (io_num_.n_output == 0) {
+    FDERROR << "The number of output tensors is 0." << std::endl;
+    return false;
+  }
+
+  inputs_desc_.resize(io_num_.n_input);
+  outputs_desc_.resize(io_num_.n_output);
+
+  // Get input info and copy to input tensor info
+  for (uint32_t i = 0; i < io_num_.n_input; i++) {
+    // Copy input_attrs_ to input tensor info
+    std::string temp_name = input_attrs_[i].name;
+    std::vector<int> temp_shape{};
+    temp_shape.resize(input_attrs_[i].n_dims);
+    for (int j = 0; j < input_attrs_[i].n_dims; j++) {
+      temp_shape[j] = (int)input_attrs_[i].dims[j];
+    }
+    FDDataType temp_dtype =
+        ultrainfer::RKNPU2Backend::RknnTensorTypeToFDDataType(
+            input_attrs_[i].type);
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    inputs_desc_[i] = temp_input_info;
+  }
+
+  for (uint32_t i = 0; i < io_num_.n_output; i++) {
+    // If the output dimension is 3, the runtime will automatically change it
+    // to 4. Obviously, this is wrong, and manual correction is required here.
+    int n_dims = static_cast<int>(output_attrs_[i].n_dims);
+    if ((n_dims == 4) && (output_attrs_[i].dims[3] == 1)) {
+      n_dims--;
+    }
+
+    // Copy output_attrs_ to output tensor
+    std::string temp_name = output_attrs_[i].name;
+    std::vector<int> temp_shape{};
+    temp_shape.resize(n_dims);
+    for (int j = 0; j < n_dims; j++) {
+      temp_shape[j] = (int)output_attrs_[i].dims[j];
+    }
+
+    // The data type of output data is changed to FP32
+    FDDataType temp_dtype = FDDataType::FP32;
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    outputs_desc_[i] = temp_input_info;
+  }
+  return true;
+}
+
+/*
+ *  @name       DumpTensorAttr
+ *  @brief      Get the model's detailed inputs and outputs
+ *  @param      rknn_tensor_attr
+ *  @return     None
+ *  @note       None
+ */
+void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr &attr) {
+  printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
+         "n_elems=%d, size=%d, fmt=%s, type=%s, "
+         "qnt_type=%s, zp=%d, scale=%f, pass_through=%d\n",
+         attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
+         attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
+         get_format_string(attr.fmt), get_type_string(attr.type),
+         get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale,
+         attr.pass_through);
+}
+
+TensorInfo RKNPU2Backend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of inputs: %d.", index,
+           NumInputs())
+  return inputs_desc_[index];
+}
+
+std::vector<TensorInfo> RKNPU2Backend::GetInputInfos() { return inputs_desc_; }
+
+TensorInfo RKNPU2Backend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs %d.", index,
+           NumOutputs())
+  return outputs_desc_[index];
+}
+
+std::vector<TensorInfo> RKNPU2Backend::GetOutputInfos() {
+  return outputs_desc_;
+}
+
+/*
+ *  @name       InitRKNNTensorMemory
+ *  @brief      Allocate memory for input and output tensors.
+ *  @param      std::vector<FDTensor>& inputs
+ *  @return     None
+ *  @note       None
+ */
+bool RKNPU2Backend::InitRKNNTensorMemory(std::vector<FDTensor> &inputs) {
+  if (tensor_memory_init_) {
+    FDERROR << "Private variable input_mems_ and output_mems_ memory has "
+               "been allocated. Please do not allocate memory repeatedly or "
+               "memory leak may occur."
+            << std::endl;
+    return false;
+  }
+  int ret = RKNN_SUCC;
+  input_mems_.resize(io_num_.n_input);
+  output_mems_.resize(io_num_.n_output);
+  for (uint32_t i = 0; i < io_num_.n_input; i++) {
+    // Judge whether the input and output types are the same
+    rknn_tensor_type input_type =
+        ultrainfer::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[i].dtype);
+    if (input_type != input_attrs_[i].type) {
+      FDWARNING << "The input tensor type != model's inputs type."
+                << "The input_type need "
+                << get_type_string(input_attrs_[i].type) << ",but inputs[" << i
+                << "].type is " << get_type_string(input_type) << std::endl;
+    }
+
+    // Create input tensor memory
+    input_attrs_[i].type = input_type;
+    input_attrs_[i].size = inputs[i].Nbytes();
+    input_attrs_[i].size_with_stride = inputs[i].Nbytes();
+
+    input_mems_[i] = rknn_create_mem(ctx_, inputs[i].Nbytes());
+    if (input_mems_[i] == nullptr) {
+      FDERROR << "The function(rknn_create_mem) failed! ret=" << ret
+              << std::endl;
+      return false;
+    }
+
+    // Set input tensor memory
+    ret = rknn_set_io_mem(ctx_, input_mems_[i], &input_attrs_[i]);
+    if (ret != RKNN_SUCC) {
+      FDERROR << "The function(rknn_set_io_mem) failed! ret=" << ret
+              << std::endl;
+      return false;
+    }
+  }
+
+  for (uint32_t i = 0; i < io_num_.n_output; ++i) {
+    // Most post-processing does not support the fp16 format.
+    uint32_t output_size = output_attrs_[i].n_elems * sizeof(float);
+    output_mems_[i] = rknn_create_mem(ctx_, output_size);
+    if (output_mems_[i] == nullptr) {
+      FDERROR << "The function(rknn_create_mem) failed! ret=" << ret
+              << std::endl;
+      return false;
+    }
+
+    // Set output tensor memory
+    ret = rknn_set_io_mem(ctx_, output_mems_[i], &output_attrs_[i]);
+    if (ret != RKNN_SUCC) {
+      FDERROR << "The function(rknn_set_io_mem) failed! ret=" << ret
+              << std::endl;
+      return false;
+    }
+  }
+
+  tensor_memory_init_ = true;
+  return true;
+}
+
+bool RKNPU2Backend::Infer(std::vector<FDTensor> &inputs,
+                          std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  if (!tensor_memory_init_) {
+    if (!InitRKNNTensorMemory(inputs)) {
+      FDERROR << "Init tensor memory failed." << std::endl;
+    }
+  }
+
+  int ret = RKNN_SUCC;
+  // Judge whether the input and output size are the same
+  if (inputs.size() != inputs_desc_.size()) {
+    FDERROR << "[RKNPU2Backend] Size of the inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << inputs_desc_.size() << ")." << std::endl;
+    return false;
+  }
+
+  // Copy input data to input tensor memory
+  for (uint32_t i = 0; i < io_num_.n_input; i++) {
+    uint32_t width = input_attrs_[i].dims[2];
+    uint32_t stride = input_attrs_[i].w_stride;
+    if (width == stride) {
+      if (inputs[i].Data() == nullptr) {
+        FDERROR << "inputs[0].Data is NULL." << std::endl;
+        return false;
+      }
+      memcpy(input_mems_[i]->virt_addr, inputs[i].Data(), inputs[i].Nbytes());
+    } else {
+      FDERROR << "[RKNPU2Backend] only support width == stride." << std::endl;
+      return false;
+    }
+  }
+
+  // run rknn
+  ret = rknn_run(ctx_, nullptr);
+  if (ret != RKNN_SUCC) {
+    FDERROR << "rknn run error! ret=" << ret << std::endl;
+    return false;
+  }
+
+  // get result
+  outputs->resize(outputs_desc_.size());
+  std::vector<int64_t> temp_shape(4);
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    temp_shape.resize(outputs_desc_[i].shape.size());
+    for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
+      temp_shape[j] = outputs_desc_[i].shape[j];
+    }
+    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
+                         outputs_desc_[i].name);
+    memcpy((*outputs)[i].MutableData(), (float *)output_mems_[i]->virt_addr,
+           (*outputs)[i].Nbytes());
+  }
+
+  return true;
+}
+
+/*
+ *  @name       RknnTensorTypeToFDDataType
+ *  @brief      Change RknnTensorType To FDDataType
+ *  @param      rknn_tensor_type
+ *  @return     None
+ *  @note       Most post-processing does not support the fp16 format.
+ *              Therefore, if the input is FP16, the output will be FP32.
+ */
+FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
+  if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT16) {
+    return FDDataType::FP32;
+  }
+  if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT32) {
+    return FDDataType::FP32;
+  }
+  if (type == rknn_tensor_type::RKNN_TENSOR_INT8) {
+    return FDDataType::INT8;
+  }
+  if (type == rknn_tensor_type::RKNN_TENSOR_INT16) {
+    return FDDataType::INT16;
+  }
+  if (type == rknn_tensor_type::RKNN_TENSOR_INT32) {
+    return FDDataType::INT32;
+  }
+  if (type == rknn_tensor_type::RKNN_TENSOR_UINT8) {
+    return FDDataType::UINT8;
+  }
+  if (type == rknn_tensor_type::RKNN_TENSOR_BOOL) {
+    return FDDataType::BOOL;
+  }
+  FDERROR << "FDDataType don't support this type" << std::endl;
+  return FDDataType::UNKNOWN1;
+}
+
+/*
+ *  @name       FDDataTypeToRknnTensorType
+ *  @brief      Change FDDataType To RknnTensorType
+ *  @param      FDDataType
+ *  @return     None
+ *  @note       None
+ */
+rknn_tensor_type
+RKNPU2Backend::FDDataTypeToRknnTensorType(ultrainfer::FDDataType type) {
+  if (type == FDDataType::FP16) {
+    return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
+  }
+  if (type == FDDataType::FP32) {
+    return rknn_tensor_type::RKNN_TENSOR_FLOAT32;
+  }
+  if (type == FDDataType::INT8) {
+    return rknn_tensor_type::RKNN_TENSOR_INT8;
+  }
+  if (type == FDDataType::INT16) {
+    return rknn_tensor_type::RKNN_TENSOR_INT16;
+  }
+  if (type == FDDataType::INT32) {
+    return rknn_tensor_type::RKNN_TENSOR_INT32;
+  }
+  if (type == FDDataType::UINT8) {
+    return rknn_tensor_type::RKNN_TENSOR_UINT8;
+  }
+  if (type == FDDataType::BOOL) {
+    return rknn_tensor_type::RKNN_TENSOR_BOOL;
+  }
+  FDERROR << "rknn_tensor_type don't support this type" << std::endl;
+  return RKNN_TENSOR_TYPE_MAX;
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h
new file mode 100755
index 0000000000..d408a9951b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h
@@ -0,0 +1,180 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "rknn_api.h" // NOLINT
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/backends/rknpu2/option.h"
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+class RKNPU2Backend : public BaseBackend {
+public:
+  /***************************** BaseBackend API *****************************/
+  RKNPU2Backend() = default;
+  virtual ~RKNPU2Backend();
+  bool Init(const RuntimeOption &runtime_option);
+  int NumInputs() const override {
+    return static_cast<int>(inputs_desc_.size());
+  }
+  int NumOutputs() const override {
+    return static_cast<int>(outputs_desc_.size());
+  }
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+  /***************************** BaseBackend API *****************************/
+
+private:
+  /*
+   *  @name       RuntimeOptionIsApplicable
+   *  @brief      This function is used to determine whether the RuntimeOption
+   *              meets the operating conditions of RKNPU2.
+   *  @param      None
+   *  @return     bool
+   *  @note       None
+   */
+  bool RuntimeOptionIsApplicable(const RuntimeOption &runtime_option);
+
+  /*
+   *  @name       LoadModel
+   *  @brief      Read the model and initialize rknn context.
+   *  @param      model: Binary data for the RKNN model or the path of RKNN
+   * model.
+   *  @return     bool
+   *  @note       None
+   */
+  bool LoadModel(void *model);
+
+  /*
+   *  @name       GetSDKAndDeviceVersion
+   *  @brief      Get RKNPU2 sdk and device version.
+   *  @param      None
+   *  @return     bool
+   *  @note       The private variable ctx must be initialized to use this
+   * function.
+   */
+  bool GetSDKAndDeviceVersion();
+
+  /*
+   *  @name      BuildOption
+   *  @brief     Save option and set core mask.
+   *  @param     RKNPU2BackendOption
+   *  @note      None
+   */
+  void BuildOption(const RKNPU2BackendOption &option);
+
+  /*
+   *  @name       SetCoreMask
+   *  @brief      Set NPU core for model
+   *  @param      core_mask: The specification of NPU core setting.
+   *  @return     bool
+   *  @note       Only support RK3588
+   */
+  bool SetCoreMask(const rknpu2::CoreMask &core_mask) const;
+
+  /*
+   *  @name       InitInputAndOutputNumber
+   *  @brief      Initialize io_num_.
+   *  @param
+   *  @return     bool
+   *  @note       The private variable ctx must be initialized to use this
+   * function.
+   */
+  bool InitInputAndOutputNumber();
+
+  /*
+   *  @name       InitRKNNTensorAddress
+   *  @brief      Allocate memory for input_attrs_ and output_attrs_.
+   *  @param      None
+   *  @return     bool
+   *  @note       None
+   */
+  bool InitRKNNTensorAddress();
+
+  /*
+   *  @name       InitInputAndOutputInformation
+   *  @brief      Initialize inputs_desc_ and outputs_desc_.
+   *  @param      None
+   *  @return     bool
+   *  @note       None
+   */
+  bool InitInputAndOutputInformation();
+
+  /*
+   *  @name       InitRKNNTensorMemory
+   *  @brief      Allocate memory for input and output tensors.
+   *  @param      std::vector<FDTensor>& inputs
+   *  @return     None
+   *  @note       None
+   */
+  bool InitRKNNTensorMemory(std::vector<FDTensor> &inputs);
+
+  rknn_context ctx_{};
+  rknn_sdk_version sdk_ver_{};
+
+  rknn_input_output_num io_num_{0, 0};
+
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+
+  rknn_tensor_attr *input_attrs_ = nullptr;
+  rknn_tensor_attr *output_attrs_ = nullptr;
+
+  std::vector<rknn_tensor_mem *> input_mems_;
+  std::vector<rknn_tensor_mem *> output_mems_;
+
+  bool io_num_init_ = false;
+  bool tensor_attrs_init_ = false;
+  bool tensor_memory_init_ = false;
+
+  RKNPU2BackendOption option_;
+
+  /*
+   *  @name       DumpTensorAttr
+   *  @brief      Get the model's detailed inputs and outputs
+   *  @param      rknn_tensor_attr
+   *  @return     None
+   *  @note       None
+   */
+  void DumpTensorAttr(rknn_tensor_attr &attr);
+
+  /*
+   *  @name       RknnTensorTypeToFDDataType
+   *  @brief      Change RknnTensorType To FDDataType
+   *  @param      rknn_tensor_type
+   *  @return     None
+   *  @note       Most post-processing does not support the fp16 format.
+   *              Therefore, if the input is FP16, the output will be FP32.
+   */
+  FDDataType RknnTensorTypeToFDDataType(rknn_tensor_type type);
+
+  /*
+   *  @name       FDDataTypeToRknnTensorType
+   *  @brief      Change FDDataType To RknnTensorType
+   *  @param      FDDataType
+   *  @return     None
+   *  @note       None
+   */
+  rknn_tensor_type FDDataTypeToRknnTensorType(FDDataType type);
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc
new file mode 100755
index 0000000000..2e0a49eded
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/rknpu2/rknpu2_config_pybind.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+#include "ultrainfer/runtime/backends/rknpu2/option.h"
+namespace ultrainfer {
+void BindRKNPU2Option(pybind11::module &m) {
+  pybind11::enum_<ultrainfer::rknpu2::CpuName>(
+      m, "CpuName", pybind11::arithmetic(), "CpuName for inference.")
+      .value("RK356X", ultrainfer::rknpu2::CpuName::RK356X)
+      .value("RK3588", ultrainfer::rknpu2::CpuName::RK3588)
+      .value("UNDEFINED", ultrainfer::rknpu2::CpuName::UNDEFINED);
+  pybind11::enum_<ultrainfer::rknpu2::CoreMask>(
+      m, "CoreMask", pybind11::arithmetic(), "CoreMask for inference.")
+      .value("RKNN_NPU_CORE_AUTO",
+             ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
+      .value("RKNN_NPU_CORE_0", ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_0)
+      .value("RKNN_NPU_CORE_1", ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_1)
+      .value("RKNN_NPU_CORE_2", ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_2)
+      .value("RKNN_NPU_CORE_0_1",
+             ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
+      .value("RKNN_NPU_CORE_0_1_2",
+             ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
+      .value("RKNN_NPU_CORE_UNDEFINED",
+             ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h
new file mode 100755
index 0000000000..9c9d079237
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/option.h
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "ultrainfer/core/fd_type.h"
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+struct SophgoBackendOption {};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc
new file mode 100755
index 0000000000..170755d068
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.cc
@@ -0,0 +1,304 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/runtime/backends/sophgo/sophgo_backend.h"
+
+#include <assert.h>
+
+namespace ultrainfer {
+SophgoBackend::~SophgoBackend() { bm_dev_free(handle_); }
+/***************************************************************
+ *  @name       GetSDKAndDeviceVersion
+ *  @brief      get Sophgo sdk and device version
+ *  @param      None
+ *  @return     bool
+ *  @note       None
+ ***************************************************************/
+bool SophgoBackend::GetSDKAndDeviceVersion() { return true; }
+
+/***************************************************************
+ *  @name       Init
+ *  @brief      Initialize Sophgo model
+ *  @param      model_file: Binary data for the Sophgo model.
+ *              params_file: None
+ *              option: config
+ *  @return     bool
+ *  @note       None
+ ***************************************************************/
+bool SophgoBackend::Init(const RuntimeOption &option) {
+  if (option.model_from_memory_) {
+    FDERROR << "SophgoBackend doesn't support load model from memory, please "
+               "load model from disk."
+            << std::endl;
+    return false;
+  }
+  if (option.model_format != ModelFormat::SOPHGO) {
+    FDERROR << "SophgoBackend only supports model format SOPHGO, but now it's "
+            << option.model_format << "." << std::endl;
+    return false;
+  }
+  if (option.device != Device::SOPHGOTPUD) {
+    FDERROR << "SophgoBackend only supports device::SOPHGOTPUD, but now it's "
+            << option.device << "." << std::endl;
+    return false;
+  }
+
+  std::string model_file = option.model_file;
+
+  // LoadModel
+  if (!this->LoadModel((char *)model_file.data())) {
+    FDERROR << "load model failed" << std::endl;
+    return false;
+  }
+
+  // GetSDKAndDeviceVersion
+  if (!this->GetSDKAndDeviceVersion()) {
+    FDERROR << "get SDK and device version failed" << std::endl;
+    return false;
+  }
+
+  // GetModelInputOutputInfos
+  if (!this->GetModelInputOutputInfos()) {
+    FDERROR << "get model input output infos failed" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+/***************************************************************
+ *  @name       LoadModel
+ *  @brief      read Sophgo bmodel
+ *  @param      model: Binary data for the Sophgo model.
+ *  @return     bool
+ *  @note       None
+ ***************************************************************/
+bool SophgoBackend::LoadModel(void *model) {
+  unsigned int card_num = 0;
+  bm_status_t status = bm_get_card_num(&card_num);
+  status = bm_dev_request(&handle_, 0);
+  p_bmrt_ = bmrt_create(handle_);
+  assert(NULL != p_bmrt_);
+
+  bool load_status = bmrt_load_bmodel(p_bmrt_, (char *)model);
+  assert(load_status);
+
+  int network_num = bmrt_get_network_number(p_bmrt_);
+
+  const char **net_names = NULL;
+  bmrt_get_network_names(p_bmrt_, &net_names);
+  net_name_ = net_names[0];
+  free(net_names);
+
+  net_info_ = bmrt_get_network_info(p_bmrt_, net_name_.c_str());
+  assert(NULL != net_info_);
+
+  return true;
+}
+
+/***************************************************************
+ *  @name       GetModelInputOutputInfos
+ *  @brief      Get the detailed input and output infos of Model
+ *  @param      None
+ *  @return     bool
+ *  @note       None
+ ***************************************************************/
+bool SophgoBackend::GetModelInputOutputInfos() {
+  inputs_desc_.resize(net_info_->input_num);
+  bm_shape_t *input_shapes = net_info_->stages->input_shapes;
+  for (int idx = 0; idx < net_info_->input_num; idx++) {
+    std::string temp_name = (net_info_->input_names)[idx];
+    std::vector<int> temp_shape{};
+    temp_shape.resize(input_shapes[idx].num_dims);
+    for (int i = 0; i < input_shapes[idx].num_dims; i++) {
+      temp_shape[i] = input_shapes[idx].dims[i];
+    }
+    bm_data_type_t *input_dtypes = net_info_->input_dtypes;
+    // SophgoType to FDDataType
+    FDDataType temp_dtype = SophgoTensorTypeToFDDataType(*input_dtypes);
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    inputs_desc_[idx] = temp_input_info;
+  }
+
+  outputs_desc_.resize(net_info_->output_num);
+  bm_shape_t *output_shapes = net_info_->stages->output_shapes;
+  for (int idx = 0; idx < net_info_->output_num; idx++) {
+    std::string temp_name1 = (net_info_->output_names)[idx];
+    std::vector<int> temp_shape1{};
+    temp_shape1.resize(output_shapes[idx].num_dims);
+    for (int i = 0; i < output_shapes[idx].num_dims; i++) {
+      temp_shape1[i] = output_shapes[idx].dims[i];
+    }
+    bm_data_type_t *output_dtypes = net_info_->output_dtypes;
+    // SophgoType to FDDataType
+    FDDataType temp_dtype1 = SophgoTensorTypeToFDDataType(*output_dtypes);
+    TensorInfo temp_output_info = {temp_name1, temp_shape1, temp_dtype1};
+    outputs_desc_[idx] = temp_output_info;
+  }
+  return true;
+}
+
+TensorInfo SophgoBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of inputs: %d.", index,
+           NumInputs())
+  return inputs_desc_[index];
+}
+
+std::vector<TensorInfo> SophgoBackend::GetInputInfos() { return inputs_desc_; }
+
+TensorInfo SophgoBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs %d.", index,
+           NumOutputs())
+  return outputs_desc_[index];
+}
+
+std::vector<TensorInfo> SophgoBackend::GetOutputInfos() {
+  return outputs_desc_;
+}
+
+bool SophgoBackend::Infer(std::vector<FDTensor> &inputs,
+                          std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  int input_size = inputs.size();
+  assert(input_size != 0);
+  assert(input_size == NumInputs());
+  bm_tensor_t input_tensors[input_size];
+  bm_status_t status = BM_SUCCESS;
+
+  RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
+  bm_data_type_t *input_dtypes = net_info_->input_dtypes;
+  for (int i = 0; i < input_size; i++) {
+    status = bm_malloc_device_byte(handle_, &input_tensors[i].device_mem,
+                                   net_info_->max_input_bytes[i]);
+    assert(BM_SUCCESS == status);
+    input_tensors[i].dtype = input_dtypes[i];
+    input_tensors[i].st_mode = BM_STORE_1N;
+    input_tensors[i].shape = net_info_->stages[0].input_shapes[i];
+    unsigned int input_byte = bmrt_tensor_bytesize(&input_tensors[i]);
+    bm_memcpy_s2d_partial(handle_, input_tensors[i].device_mem,
+                          (void *)inputs[i].Data(),
+                          bmrt_tensor_bytesize(&input_tensors[i]));
+  }
+
+  int output_size = NumOutputs();
+  bm_tensor_t output_tensors[output_size];
+  for (int i = 0; i < output_size; i++) {
+    status = bm_malloc_device_byte(handle_, &output_tensors[i].device_mem,
+                                   net_info_->max_output_bytes[i]);
+    assert(BM_SUCCESS == status);
+  }
+
+  RUNTIME_PROFILE_LOOP_BEGIN(1)
+  bool launch_status = bmrt_launch_tensor_ex(
+      p_bmrt_, net_name_.c_str(), input_tensors, net_info_->input_num,
+      output_tensors, net_info_->output_num, true, false);
+  assert(launch_status);
+  status = bm_thread_sync(handle_);
+  assert(status == BM_SUCCESS);
+  RUNTIME_PROFILE_LOOP_END
+
+  outputs->resize(outputs_desc_.size());
+  bm_data_type_t *output_dtypes = net_info_->output_dtypes;
+  for (int i = 0; i < output_size; i++) {
+    int temp_bytesize = bmrt_tensor_bytesize(&output_tensors[i]); // Byte
+    float *temp_out = (float *)malloc(temp_bytesize);
+    bm_memcpy_d2s_partial(handle_, temp_out, output_tensors[i].device_mem,
+                          temp_bytesize);
+
+    std::vector<int64_t> temp_shape;
+    temp_shape.resize(outputs_desc_[i].shape.size());
+    for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
+      temp_shape[j] = outputs_desc_[i].shape[j];
+    }
+    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
+                         outputs_desc_[i].name);
+
+    memcpy((*outputs)[i].MutableData(), temp_out, (*outputs)[i].Nbytes());
+    free(temp_out);
+  }
+
+  for (int i = 0; i < input_size; i++) {
+    bm_free_device(handle_, input_tensors[i].device_mem);
+  }
+  for (int i = 0; i < output_size; i++) {
+    bm_free_device(handle_, output_tensors[i].device_mem);
+  }
+  RUNTIME_PROFILE_LOOP_H2D_D2H_END
+
+  return true;
+}
+
+/***************************************************************
+ *  @name       SophgoTensorTypeToFDDataType
+ *  @brief      Change SophgoTensorType To FDDataType
+ *  @param      bm_data_type_t
+ *  @return     None
+ *  @note       None
+ ***************************************************************/
+FDDataType SophgoBackend::SophgoTensorTypeToFDDataType(bm_data_type_t type) {
+  if (type == BM_FLOAT16) {
+    return FDDataType::FP32;
+  }
+  if (type == BM_FLOAT32) {
+    return FDDataType::FP32;
+  }
+  if (type == BM_INT8) {
+    return FDDataType::INT8;
+  }
+  if (type == BM_INT16) {
+    return FDDataType::INT16;
+  }
+  if (type == BM_INT32) {
+    return FDDataType::INT32;
+  }
+  if (type == BM_UINT8) {
+    return FDDataType::UINT8;
+  }
+  FDERROR << "FDDataType don't support this type" << std::endl;
+  return FDDataType::UNKNOWN1;
+}
+
+/***************************************************************
+ *  @name       FDDataTypeToSophgoTensorType
+ *  @brief      Change FDDataType To SophgoTensorType
+ *  @param      FDDataType
+ *  @return     None
+ *  @note       None
+ ***************************************************************/
+// Sophgo_tensor_type
+bm_data_type_t
+SophgoBackend::FDDataTypeToSophgoTensorType(ultrainfer::FDDataType type) {
+  if (type == FDDataType::FP16) {
+    return BM_FLOAT16;
+  }
+  if (type == FDDataType::FP32) {
+    return BM_FLOAT32;
+  }
+  if (type == FDDataType::INT8) {
+    return BM_INT8;
+  }
+  if (type == FDDataType::INT16) {
+    return BM_INT16;
+  }
+  if (type == FDDataType::INT32) {
+    return BM_INT32;
+  }
+  if (type == FDDataType::UINT8) {
+    return BM_UINT8;
+  }
+  FDERROR << "Sophgo_tensor_type don't support this type" << std::endl;
+  return BM_FLOAT32;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h
new file mode 100755
index 0000000000..fc06faeb63
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/sophgo/sophgo_backend.h
@@ -0,0 +1,71 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "bmlib_runtime.h"       // NOLINT
+#include "bmruntime_interface.h" // NOLINT
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/backends/sophgo/option.h"
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+
+class SophgoBackend : public BaseBackend {
+public:
+  SophgoBackend() = default;
+  virtual ~SophgoBackend();
+  bool Init(const RuntimeOption &option);
+
+  int NumInputs() const override {
+    return static_cast<int>(inputs_desc_.size());
+  }
+
+  int NumOutputs() const override {
+    return static_cast<int>(outputs_desc_.size());
+  }
+
+  TensorInfo GetInputInfo(int index) override;
+  TensorInfo GetOutputInfo(int index) override;
+  std::vector<TensorInfo> GetInputInfos() override;
+  std::vector<TensorInfo> GetOutputInfos() override;
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+
+private:
+  bool LoadModel(void *model);
+  bool GetSDKAndDeviceVersion();
+  bool GetModelInputOutputInfos();
+
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+  std::string net_name_;
+
+  bm_handle_t handle_;
+  void *p_bmrt_ = nullptr;
+
+  bool infer_init = false;
+
+  const bm_net_info_t *net_info_ = nullptr;
+
+  // SophgoTPU2BackendOption option_;
+
+  static FDDataType SophgoTensorTypeToFDDataType(bm_data_type_t type);
+  static bm_data_type_t FDDataTypeToSophgoTensorType(FDDataType type);
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h
new file mode 100755
index 0000000000..c0d6feb672
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/option.h
@@ -0,0 +1,21 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+namespace ultrainfer {
+struct TVMBackendOption {
+  TVMBackendOption() {}
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc
new file mode 100755
index 0000000000..a3fb414e4e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.cc
@@ -0,0 +1,204 @@
+#include "ultrainfer/runtime/backends/tvm/tvm_backend.h"
+
+#include "yaml-cpp/yaml.h"
+namespace ultrainfer {
+bool TVMBackend::Init(const ultrainfer::RuntimeOption &runtime_option) {
+  if (!(Supported(runtime_option.model_format, Backend::TVM) &&
+        Supported(runtime_option.device, Backend::TVM))) {
+    FDERROR << "TVMBackend only supports model "
+               "ModelFormat::TVMFormat/Backend::TVM, but now its "
+            << runtime_option.model_format << "/" << runtime_option.device
+            << std::endl;
+    return false;
+  }
+
+  if (runtime_option.model_from_memory_) {
+    FDERROR << "TVMBackend doesn't support load model from memory, please "
+               "load model from disk."
+            << std::endl;
+    return false;
+  }
+
+  if (!BuildDLDevice(runtime_option.device)) {
+    FDERROR << "TVMBackend only don't support run in this device." << std::endl;
+    return false;
+  }
+
+  if (!BuildModel(runtime_option)) {
+    FDERROR << "TVMBackend only don't support run with this model path."
+            << std::endl;
+    return false;
+  }
+
+  if (!InitInputAndOutputTensor()) {
+    FDERROR << "InitInputAndOutputTensor failed." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool TVMBackend::InitInputAndOutputTensor() {
+  input_tensor_.resize(NumInputs());
+  for (int i = 0; i < NumInputs(); ++i) {
+    TensorInfo tensor_info = GetInputInfo(i);
+    tvm::ShapeTuple shape(tensor_info.shape.begin(), tensor_info.shape.end());
+    input_tensor_[i] = tvm::runtime::NDArray::Empty(
+        shape, FDDataTypeToDLDataType(tensor_info.dtype), dev_);
+  }
+
+  output_tensor_.resize(NumOutputs());
+  for (int i = 0; i < NumOutputs(); ++i) {
+    TensorInfo tensor_info = GetOutputInfo(i);
+    tvm::ShapeTuple shape(tensor_info.shape.begin(), tensor_info.shape.end());
+    output_tensor_[i] = tvm::runtime::NDArray::Empty(
+        shape, FDDataTypeToDLDataType(tensor_info.dtype), dev_);
+  }
+  return true;
+}
+
+bool TVMBackend::BuildModel(const RuntimeOption &runtime_option) {
+  // load in the library
+  tvm::runtime::Module mod_factory =
+      tvm::runtime::Module::LoadFromFile(runtime_option.model_file);
+
+  // create the graph executor module
+  gmod_ = mod_factory.GetFunction("default")(dev_);
+
+  // load params
+  std::ifstream params_in(runtime_option.params_file, std::ios::binary);
+  std::string params_data((std::istreambuf_iterator<char>(params_in)),
+                          std::istreambuf_iterator<char>());
+  params_in.close();
+  TVMByteArray params_arr;
+  params_arr.data = params_data.c_str();
+  params_arr.size = params_data.length();
+  tvm::runtime::PackedFunc load_params = gmod_.GetFunction("load_params");
+  load_params(params_arr);
+
+  // read input and output info
+  tvm::runtime::PackedFunc get_input_info = gmod_.GetFunction("get_input_info");
+  tvm::Map<tvm::String, tvm::ObjectRef> input_info = get_input_info();
+  auto input_info_shape = tvm::Downcast<tvm::Map<tvm::String, tvm::ShapeTuple>>(
+      input_info["shape"]);
+  inputs_desc_.reserve(input_info_shape.size());
+  for (auto map_node : input_info_shape) {
+    std::string temp_name = map_node.first;
+
+    tvm::ShapeTuple tup = map_node.second;
+    std::vector<int> temp_shape{};
+    temp_shape.resize(tup.size());
+    for (int j = 0; j < tup.size(); ++j) {
+      temp_shape[j] = static_cast<int>(tup[j]);
+    }
+
+    FDDataType temp_dtype = ultrainfer::UNKNOWN1;
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    inputs_desc_.emplace_back(temp_input_info);
+  }
+
+  int input_dtype_index = 0;
+  auto input_info_dtype =
+      tvm::Downcast<tvm::Map<tvm::String, tvm::String>>(input_info["dtype"]);
+  for (auto map_node : input_info_dtype) {
+    tvm::String tup = map_node.second;
+    inputs_desc_[input_dtype_index].dtype = TVMTensorTypeToFDDataType(tup);
+    input_dtype_index++;
+  }
+
+  tvm::runtime::PackedFunc get_output_info =
+      gmod_.GetFunction("get_output_info");
+  tvm::Map<tvm::String, tvm::ObjectRef> output_info = get_output_info();
+  auto output_info_shape =
+      tvm::Downcast<tvm::Map<tvm::String, tvm::ShapeTuple>>(
+          output_info["shape"]);
+  outputs_desc_.reserve(output_info_shape.size());
+  for (auto map_node : output_info_shape) {
+    std::string temp_name = map_node.first;
+
+    tvm::ShapeTuple tup = map_node.second;
+    std::vector<int> temp_shape{};
+    temp_shape.resize(tup.size());
+    for (int j = 0; j < tup.size(); ++j) {
+      temp_shape[j] = static_cast<int>(tup[j]);
+    }
+
+    FDDataType temp_dtype = ultrainfer::FP32;
+    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+    outputs_desc_.emplace_back(temp_input_info);
+  }
+
+  int output_dtype_index = 0;
+  auto output_info_dtype =
+      tvm::Downcast<tvm::Map<tvm::String, tvm::String>>(output_info["dtype"]);
+  for (auto map_node : output_info_dtype) {
+    tvm::String tup = map_node.second;
+    outputs_desc_[output_dtype_index].dtype = TVMTensorTypeToFDDataType(tup);
+    output_dtype_index++;
+  }
+  return true;
+}
+
+FDDataType TVMBackend::TVMTensorTypeToFDDataType(tvm::String type) {
+  if (type == "float32") {
+    return FDDataType::FP32;
+  }
+  FDERROR << "FDDataType don't support this type" << std::endl;
+  return FDDataType::UNKNOWN1;
+}
+
+bool TVMBackend::Infer(std::vector<FDTensor> &inputs,
+                       std::vector<FDTensor> *outputs, bool copy_to_fd) {
+  for (int i = 0; i < inputs.size(); ++i) {
+    memcpy(input_tensor_[i]->data, inputs[i].Data(), inputs[i].Nbytes());
+  }
+
+  // get the function from the module(set input data)
+  tvm::runtime::PackedFunc set_input = gmod_.GetFunction("set_input");
+  for (int i = 0; i < NumInputs(); ++i) {
+    set_input(GetInputInfo(i).name, input_tensor_[i]);
+  }
+
+  // get the function from the module(run it)
+  tvm::runtime::PackedFunc run = gmod_.GetFunction("run");
+  run();
+
+  // get the function from the module(get output data)
+  tvm::runtime::PackedFunc get_output = gmod_.GetFunction("get_output");
+  for (int i = 0; i < NumOutputs(); ++i) {
+    get_output(i, output_tensor_[i]);
+  }
+
+  // get result
+  outputs->resize(NumOutputs());
+  std::vector<int64_t> temp_shape{};
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    temp_shape.resize(outputs_desc_[i].shape.size());
+    for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
+      temp_shape[j] = outputs_desc_[i].shape[j];
+    }
+    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
+                         outputs_desc_[i].name);
+    memcpy((*outputs)[i].MutableData(),
+           static_cast<float *>(output_tensor_[i]->data),
+           (*outputs)[i].Nbytes());
+  }
+  return true;
+}
+
+bool TVMBackend::BuildDLDevice(ultrainfer::Device device) {
+  if (device == Device::CPU) {
+    dev_ = DLDevice{kDLCPU, 0};
+  } else {
+    FDERROR << "TVMBackend only support run in CPU." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+DLDataType TVMBackend::FDDataTypeToDLDataType(ultrainfer::FDDataType dtype) {
+  if (dtype == FDDataType::FP32) {
+    return DLDataType{kDLFloat, 32, 1};
+  }
+  return {};
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h
new file mode 100755
index 0000000000..693c0067f2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/backends/tvm/tvm_backend.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/runtime/backends/backend.h"
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <dlpack/dlpack.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/packed_func.h>
+#include <tvm/runtime/registry.h>
+#include <unistd.h>
+
+namespace ultrainfer {
+class TVMBackend : public BaseBackend {
+public:
+  TVMBackend() = default;
+  virtual ~TVMBackend() = default;
+  bool Init(const RuntimeOption &runtime_option) override;
+  int NumInputs() const override { return inputs_desc_.size(); }
+  int NumOutputs() const override { return outputs_desc_.size(); }
+  TensorInfo GetInputInfo(int index) override { return inputs_desc_[index]; }
+  TensorInfo GetOutputInfo(int index) override { return outputs_desc_[index]; }
+  std::vector<TensorInfo> GetInputInfos() override { return inputs_desc_; }
+  std::vector<TensorInfo> GetOutputInfos() override { return outputs_desc_; }
+  bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
+             bool copy_to_fd = true) override;
+
+private:
+  DLDevice dev_{};
+  tvm::runtime::Module gmod_;
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+
+  bool BuildDLDevice(Device device);
+  bool BuildModel(const RuntimeOption &runtime_option);
+  bool InitInputAndOutputTensor();
+
+  std::vector<tvm::runtime::NDArray> input_tensor_;
+  std::vector<tvm::runtime::NDArray> output_tensor_;
+
+  FDDataType TVMTensorTypeToFDDataType(tvm::String type);
+  DLDataType FDDataTypeToDLDataType(FDDataType dtype);
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/enum_variables.cc b/libs/ultrainfer/ultrainfer/runtime/enum_variables.cc
new file mode 100755
index 0000000000..2dfcabfd93
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/enum_variables.cc
@@ -0,0 +1,145 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/enum_variables.h"
+
+namespace ultrainfer {
+std::ostream &operator<<(std::ostream &out, const Backend &backend) {
+  if (backend == Backend::ORT) {
+    out << "Backend::ORT";
+  } else if (backend == Backend::TRT) {
+    out << "Backend::TRT";
+  } else if (backend == Backend::PDINFER) {
+    out << "Backend::PDINFER";
+  } else if (backend == Backend::OPENVINO) {
+    out << "Backend::OPENVINO";
+  } else if (backend == Backend::RKNPU2) {
+    out << "Backend::RKNPU2";
+  } else if (backend == Backend::SOPHGOTPU) {
+    out << "Backend::SOPHGOTPU";
+  } else if (backend == Backend::POROS) {
+    out << "Backend::POROS";
+  } else if (backend == Backend::LITE) {
+    out << "Backend::PDLITE";
+  } else if (backend == Backend::HORIZONNPU) {
+    out << "Backend::HORIZONNPU";
+  } else if (backend == Backend::TVM) {
+    out << "Backend::TVM";
+  } else {
+    out << "UNKNOWN-Backend";
+  }
+  return out;
+}
+
+std::ostream &operator<<(std::ostream &out, const Device &d) {
+  switch (d) {
+  case Device::CPU:
+    out << "Device::CPU";
+    break;
+  case Device::GPU:
+    out << "Device::GPU";
+    break;
+  case Device::RKNPU:
+    out << "Device::RKNPU";
+    break;
+  case Device::SUNRISENPU:
+    out << "Device::SUNRISENPU";
+    break;
+  case Device::SOPHGOTPUD:
+    out << "Device::SOPHGOTPUD";
+    break;
+  case Device::TIMVX:
+    out << "Device::TIMVX";
+    break;
+  case Device::KUNLUNXIN:
+    out << "Device::KUNLUNXIN";
+    break;
+  case Device::ASCEND:
+    out << "Device::ASCEND";
+    break;
+  case Device::DIRECTML:
+    out << "Device::DIRECTML";
+    break;
+  default:
+    out << "Device::UNKOWN";
+  }
+  return out;
+}
+
+std::ostream &operator<<(std::ostream &out, const ModelFormat &format) {
+  if (format == ModelFormat::PADDLE) {
+    out << "ModelFormat::PADDLE";
+  } else if (format == ModelFormat::ONNX) {
+    out << "ModelFormat::ONNX";
+  } else if (format == ModelFormat::RKNN) {
+    out << "ModelFormat::RKNN";
+  } else if (format == ModelFormat::SOPHGO) {
+    out << "ModelFormat::SOPHGO";
+  } else if (format == ModelFormat::TORCHSCRIPT) {
+    out << "ModelFormat::TORCHSCRIPT";
+  } else if (format == ModelFormat::HORIZON) {
+    out << "ModelFormat::HORIZON";
+  } else if (format == ModelFormat::TVMFormat) {
+    out << "ModelFormat::TVMFormat";
+  } else {
+    out << "UNKNOWN-ModelFormat";
+  }
+  return out;
+}
+
+std::vector<Backend> GetAvailableBackends() {
+  std::vector<Backend> backends;
+#ifdef ENABLE_ORT_BACKEND
+  backends.push_back(Backend::ORT);
+#endif
+#ifdef ENABLE_TRT_BACKEND
+  backends.push_back(Backend::TRT);
+#endif
+#ifdef ENABLE_PADDLE_BACKEND
+  backends.push_back(Backend::PDINFER);
+#endif
+#ifdef ENABLE_POROS_BACKEND
+  backends.push_back(Backend::POROS);
+#endif
+#ifdef ENABLE_OPENVINO_BACKEND
+  backends.push_back(Backend::OPENVINO);
+#endif
+#ifdef ENABLE_LITE_BACKEND
+  backends.push_back(Backend::LITE);
+#endif
+#ifdef ENABLE_RKNPU2_BACKEND
+  backends.push_back(Backend::RKNPU2);
+#endif
+#ifdef ENABLE_HORIZON_BACKEND
+  backends.push_back(Backend::HORIZONNPU);
+#endif
+#ifdef ENABLE_SOPHGO_BACKEND
+  backends.push_back(Backend::SOPHGOTPU);
+#endif
+#ifdef ENABLE_TVM_BACKEND
+  backends.push_back(Backend::TVM);
+#endif
+  return backends;
+}
+
+bool IsBackendAvailable(const Backend &backend) {
+  std::vector<Backend> backends = GetAvailableBackends();
+  for (size_t i = 0; i < backends.size(); ++i) {
+    if (backend == backends[i]) {
+      return true;
+    }
+  }
+  return false;
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/enum_variables.h b/libs/ultrainfer/ultrainfer/runtime/enum_variables.h
new file mode 100755
index 0000000000..3e2f234e30
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/enum_variables.h
@@ -0,0 +1,148 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file enum_variables.h
+    \brief A brief file description.
+
+    More details
+ */
+
+#pragma once
+#include "ultrainfer/utils/utils.h"
+#include <map>
+#include <ostream>
+
+namespace ultrainfer {
+
+/*! Inference backend supported in UltraInfer */
+enum Backend {
+  UNKNOWN, ///< Unknown inference backend
+  ORT,     //< ONNX Runtime, support Paddle/ONNX format model,
+  //< CPU/ Nvidia GPU DirectML
+  TRT,      ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
+  PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
+  POROS,    ///< Poros, support TorchScript format model, CPU / Nvidia GPU
+  OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
+  LITE,     ///< Paddle Lite, support Paddle format model, ARM CPU / ARM GPU
+  RKNPU2,   ///< RKNPU2, support RKNN format model, Rockchip NPU only
+  SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
+  HORIZONNPU, ///< HORIZONNPU, support Horizon format model, Horizon NPU
+  TVM,        ///< TVMBackend, support TVM format model, CPU / Nvidia GPU
+};
+
+/**
+ * @brief Get all the available inference backend in UltraInfer
+ */
+ULTRAINFER_DECL std::vector<Backend> GetAvailableBackends();
+
+/**
+ * @brief Check if the inference backend available
+ */
+ULTRAINFER_DECL bool IsBackendAvailable(const Backend &backend);
+
+enum ULTRAINFER_DECL Device {
+  CPU,
+  GPU,
+  RKNPU,
+  IPU,
+  TIMVX,
+  KUNLUNXIN,
+  ASCEND,
+  SOPHGOTPUD,
+  DIRECTML,
+  SUNRISENPU,
+};
+
+/*! Deep learning model format */
+enum ModelFormat {
+  AUTOREC,     ///< Auto recognize the model format by model file name
+  PADDLE,      ///< Model with paddlepaddle format
+  ONNX,        ///< Model with ONNX format
+  RKNN,        ///< Model with RKNN format
+  TORCHSCRIPT, ///< Model with TorchScript format
+  SOPHGO,      ///< Model with SOPHGO format
+  HORIZON,     ///< Model with HORIZON format
+  TVMFormat,   ///< Model with TVM format
+};
+
+/// Describle all the supported backends for specified model format
+static std::map<ModelFormat, std::vector<Backend>>
+    s_default_backends_by_format = {
+        {ModelFormat::PADDLE,
+         {Backend::PDINFER, Backend::LITE, Backend::ORT, Backend::OPENVINO,
+          Backend::TRT}},
+        {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
+        {ModelFormat::RKNN, {Backend::RKNPU2}},
+        {ModelFormat::HORIZON, {Backend::HORIZONNPU}},
+        {ModelFormat::TORCHSCRIPT, {Backend::POROS}},
+        {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}},
+        {ModelFormat::TVMFormat, {Backend::TVM}}};
+
+/// Describle all the supported backends for specified device
+static std::map<Device, std::vector<Backend>> s_default_backends_by_device = {
+    {Device::CPU,
+     {Backend::LITE, Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+      Backend::POROS, Backend::TVM}},
+    {Device::GPU,
+     {Backend::LITE, Backend::PDINFER, Backend::ORT, Backend::TRT,
+      Backend::POROS, Backend::TVM}},
+    {Device::RKNPU, {Backend::RKNPU2}},
+    {Device::SUNRISENPU, {Backend::HORIZONNPU}},
+    {Device::IPU, {Backend::PDINFER}},
+    {Device::TIMVX, {Backend::LITE}},
+    {Device::KUNLUNXIN, {Backend::LITE, Backend::PDINFER}},
+    {Device::ASCEND, {Backend::LITE}},
+    {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}},
+    {Device::DIRECTML, {Backend::ORT}}};
+
+inline bool Supported(ModelFormat format, Backend backend) {
+  auto iter = s_default_backends_by_format.find(format);
+  if (iter == s_default_backends_by_format.end()) {
+    FDERROR << "Didn't find format is registered in "
+            << "s_default_backends_by_format." << std::endl;
+    return false;
+  }
+  for (size_t i = 0; i < iter->second.size(); ++i) {
+    if (iter->second[i] == backend) {
+      return true;
+    }
+  }
+  std::string msg = Str(iter->second);
+  FDERROR << backend << " only supports " << msg << ", but now it's " << format
+          << "." << std::endl;
+  return false;
+}
+
+inline bool Supported(Device device, Backend backend) {
+  auto iter = s_default_backends_by_device.find(device);
+  if (iter == s_default_backends_by_device.end()) {
+    FDERROR << "Didn't find device is registered in "
+            << "s_default_backends_by_device." << std::endl;
+    return false;
+  }
+  for (size_t i = 0; i < iter->second.size(); ++i) {
+    if (iter->second[i] == backend) {
+      return true;
+    }
+  }
+  std::string msg = Str(iter->second);
+  FDERROR << backend << " only supports " << msg << ", but now it's " << device
+          << "." << std::endl;
+  return false;
+}
+
+ULTRAINFER_DECL std::ostream &operator<<(std::ostream &o, const Backend &b);
+ULTRAINFER_DECL std::ostream &operator<<(std::ostream &o, const Device &d);
+ULTRAINFER_DECL std::ostream &operator<<(std::ostream &o, const ModelFormat &f);
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/option_pybind.cc b/libs/ultrainfer/ultrainfer/runtime/option_pybind.cc
new file mode 100755
index 0000000000..b2d30df6f7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/option_pybind.cc
@@ -0,0 +1,83 @@
+// Cropyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindLiteOption(pybind11::module &m);
+void BindOpenVINOOption(pybind11::module &m);
+void BindOrtOption(pybind11::module &m);
+void BindTrtOption(pybind11::module &m);
+void BindPaddleOption(pybind11::module &m);
+void BindPorosOption(pybind11::module &m);
+void BindRKNPU2Option(pybind11::module &m);
+void BindOption(pybind11::module &m) {
+  BindLiteOption(m);
+  BindOpenVINOOption(m);
+  BindOrtOption(m);
+  BindTrtOption(m);
+  BindPaddleOption(m);
+  BindPorosOption(m);
+  BindRKNPU2Option(m);
+
+  pybind11::class_<RuntimeOption>(m, "RuntimeOption")
+      .def(pybind11::init())
+      .def("set_model_path", &RuntimeOption::SetModelPath)
+      .def("set_model_buffer", &RuntimeOption::SetModelBuffer)
+      .def("use_gpu", &RuntimeOption::UseGpu)
+      .def("use_cpu", &RuntimeOption::UseCpu)
+      .def("use_rknpu2", &RuntimeOption::UseRKNPU2)
+      .def("use_sophgo", &RuntimeOption::UseSophgo)
+      .def("use_ascend", &RuntimeOption::UseAscend)
+      .def("use_kunlunxin", &RuntimeOption::UseKunlunXin)
+      .def("disable_valid_backend_check",
+           &RuntimeOption::DisableValidBackendCheck)
+      .def("enable_valid_backend_check",
+           &RuntimeOption::EnableValidBackendCheck)
+      .def_readwrite("paddle_lite_option", &RuntimeOption::paddle_lite_option)
+      .def_readwrite("openvino_option", &RuntimeOption::openvino_option)
+      .def_readwrite("ort_option", &RuntimeOption::ort_option)
+      .def_readwrite("trt_option", &RuntimeOption::trt_option)
+      .def_readwrite("poros_option", &RuntimeOption::poros_option)
+      .def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option)
+      .def("set_external_stream", &RuntimeOption::SetExternalStream)
+      .def("set_external_raw_stream",
+           [](RuntimeOption &self, size_t external_stream) {
+             self.SetExternalStream(reinterpret_cast<void *>(external_stream));
+           })
+      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
+      .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
+      .def("use_poros_backend", &RuntimeOption::UsePorosBackend)
+      .def("use_tvm_backend", &RuntimeOption::UseTVMBackend)
+      .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
+      .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
+      .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
+      .def("use_lite_backend", &RuntimeOption::UseLiteBackend)
+      .def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
+      .def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
+      .def("use_ipu", &RuntimeOption::UseIpu)
+      .def("enable_profiling", &RuntimeOption::EnableProfiling)
+      .def("disable_profiling", &RuntimeOption::DisableProfiling)
+      .def_readwrite("model_file", &RuntimeOption::model_file)
+      .def_readwrite("params_file", &RuntimeOption::params_file)
+      .def_readwrite("model_format", &RuntimeOption::model_format)
+      .def_readwrite("backend", &RuntimeOption::backend)
+      .def_readwrite("external_stream", &RuntimeOption::external_stream_)
+      .def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
+      .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
+      .def_readwrite("device_id", &RuntimeOption::device_id)
+      .def_readwrite("device", &RuntimeOption::device);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime.cc b/libs/ultrainfer/ultrainfer/runtime/runtime.cc
new file mode 100755
index 0000000000..f6c7cf768e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/runtime.cc
@@ -0,0 +1,431 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/runtime.h"
+
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/utils/utils.h"
+#include "yaml-cpp/yaml.h"
+
+#ifdef ENABLE_ORT_BACKEND
+#include "ultrainfer/runtime/backends/ort/ort_backend.h"
+#endif
+
+#ifdef ENABLE_TRT_BACKEND
+#include "ultrainfer/runtime/backends/tensorrt/trt_backend.h"
+#endif
+
+#ifdef ENABLE_PADDLE_BACKEND
+#include "ultrainfer/runtime/backends/paddle/paddle_backend.h"
+#endif
+
+#ifdef ENABLE_POROS_BACKEND
+#include "ultrainfer/runtime/backends/poros/poros_backend.h"
+#endif
+
+#ifdef ENABLE_OPENVINO_BACKEND
+#include "ultrainfer/runtime/backends/openvino/ov_backend.h"
+#endif
+
+#ifdef ENABLE_LITE_BACKEND
+#include "ultrainfer/runtime/backends/lite/lite_backend.h"
+#endif
+
+#ifdef ENABLE_RKNPU2_BACKEND
+#include "ultrainfer/runtime/backends/rknpu2/rknpu2_backend.h"
+#endif
+
+#ifdef ENABLE_SOPHGO_BACKEND
+#include "ultrainfer/runtime/backends/sophgo/sophgo_backend.h"
+#endif
+
+#ifdef ENABLE_HORIZON_BACKEND
+#include "ultrainfer/runtime/backends/horizon/horizon_backend.h"
+#endif
+
+#ifdef ENABLE_TVM_BACKEND
+#include "ultrainfer/runtime/backends/tvm/tvm_backend.h"
+#endif
+
+namespace ultrainfer {
+
+bool AutoSelectBackend(RuntimeOption &option) {
+  auto iter0 = s_default_backends_by_format.find(option.model_format);
+  if (iter0 == s_default_backends_by_format.end()) {
+    FDERROR << "Cannot found a default backend for model format: "
+            << option.model_format
+            << ", please define the inference backend in RuntimeOption."
+            << std::endl;
+    return false;
+  }
+
+  auto iter1 = s_default_backends_by_device.find(option.device);
+  if (iter1 == s_default_backends_by_device.end()) {
+    FDERROR << "Cannot found a default backend for device: " << option.device
+            << ", please define the inference backend in RuntimeOption."
+            << std::endl;
+    return false;
+  }
+
+  std::vector<Backend> candidates;
+  for (const auto &b0 : iter0->second) {
+    for (const auto &b1 : iter1->second) {
+      if (b0 == b1) {
+        candidates.push_back(b0);
+      }
+    }
+  }
+
+  if (candidates.size() == 0) {
+    FDERROR << "Cannot found availabel inference backends by model format: "
+            << option.model_format << " with device: " << option.device
+            << std::endl;
+    return false;
+  }
+
+  for (const auto &b : candidates) {
+    if (IsBackendAvailable(b)) {
+      option.backend = b;
+      FDINFO << "UltraInfer will choose " << b << " to inference this model."
+             << std::endl;
+      return true;
+    }
+  }
+  std::string debug_message = Str(candidates);
+  FDERROR << "The candiate backends for " << option.model_format << " & "
+          << option.device << " are " << debug_message
+          << ", but both of them have not been compiled with current "
+             "UltraInfer yet."
+          << std::endl;
+  return false;
+}
+
+bool Runtime::Init(const RuntimeOption &_option) {
+  option = _option;
+
+  // Choose default backend by model format and device if backend is not
+  // specified
+  if (option.backend == Backend::UNKNOWN) {
+    if (!AutoSelectBackend(option)) {
+      return false;
+    }
+  }
+
+  if (option.backend == Backend::ORT) {
+    CreateOrtBackend();
+  } else if (option.backend == Backend::TRT) {
+    CreateTrtBackend();
+  } else if (option.backend == Backend::PDINFER) {
+    CreatePaddleBackend();
+  } else if (option.backend == Backend::OPENVINO) {
+    CreateOpenVINOBackend();
+  } else if (option.backend == Backend::LITE) {
+    CreateLiteBackend();
+  } else if (option.backend == Backend::RKNPU2) {
+    CreateRKNPU2Backend();
+  } else if (option.backend == Backend::SOPHGOTPU) {
+    CreateSophgoNPUBackend();
+  } else if (option.backend == Backend::POROS) {
+    CreatePorosBackend();
+  } else if (option.backend == Backend::HORIZONNPU) {
+    CreateHorizonBackend();
+  } else if (option.backend == Backend::TVM) {
+    CreateTVMBackend();
+  } else {
+    std::string msg = Str(GetAvailableBackends());
+    FDERROR << "The compiled UltraInfer only supports " << msg << ", "
+            << option.backend << " is not supported now." << std::endl;
+    return false;
+  }
+  backend_->benchmark_option_ = option.benchmark_option;
+  return true;
+}
+
+TensorInfo Runtime::GetInputInfo(int index) {
+  return backend_->GetInputInfo(index);
+}
+
+TensorInfo Runtime::GetOutputInfo(int index) {
+  return backend_->GetOutputInfo(index);
+}
+
+std::vector<TensorInfo> Runtime::GetInputInfos() {
+  return backend_->GetInputInfos();
+}
+
+std::vector<TensorInfo> Runtime::GetOutputInfos() {
+  return backend_->GetOutputInfos();
+}
+
+bool Runtime::Infer(std::vector<FDTensor> &input_tensors,
+                    std::vector<FDTensor> *output_tensors) {
+  for (auto &tensor : input_tensors) {
+    FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
+             "Device id of input tensor(%d) and runtime(%d) are not same.",
+             tensor.device_id, option.device_id);
+  }
+  return backend_->Infer(input_tensors, output_tensors);
+}
+
+bool Runtime::Infer() {
+  bool result = false;
+  if (option.device == Device::KUNLUNXIN) {
+    // FDTensor SetExternalData is not support for Device::KUNLUNXIN
+    // now, so, we need to set copy_to_fd as 'true'.
+    result = backend_->Infer(input_tensors_, &output_tensors_, true);
+  } else {
+    result = backend_->Infer(input_tensors_, &output_tensors_, false);
+  }
+
+  for (auto &tensor : output_tensors_) {
+    tensor.device_id = option.device_id;
+  }
+  return result;
+}
+
+void Runtime::BindInputTensor(const std::string &name, FDTensor &input) {
+  bool is_exist = false;
+  for (auto &t : input_tensors_) {
+    if (t.name == name) {
+      is_exist = true;
+      t.SetExternalData(input.shape, input.dtype, input.MutableData(),
+                        input.device, input.device_id);
+      break;
+    }
+  }
+  if (!is_exist) {
+    FDTensor new_tensor(name);
+    new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
+                               input.device, input.device_id);
+    input_tensors_.emplace_back(std::move(new_tensor));
+  }
+}
+
+void Runtime::BindOutputTensor(const std::string &name, FDTensor &output) {
+  bool is_exist = false;
+  for (auto &t : output_tensors_) {
+    if (t.name == name) {
+      is_exist = true;
+      t.SetExternalData(output.shape, output.dtype, output.MutableData(),
+                        output.device, output.device_id);
+      break;
+    }
+  }
+  if (!is_exist) {
+    FDTensor new_tensor(name);
+    new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
+                               output.device, output.device_id);
+    output_tensors_.emplace_back(std::move(new_tensor));
+  }
+}
+FDTensor *Runtime::GetOutputTensor(const std::string &name) {
+  for (auto &t : output_tensors_) {
+    if (t.name == name) {
+      return &t;
+    }
+  }
+  FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
+  return nullptr;
+}
+
+void Runtime::ReleaseModelMemoryBuffer() {
+  if (option.model_from_memory_) {
+    option.model_file.clear();
+    option.model_file.shrink_to_fit();
+    option.params_file.clear();
+    option.params_file.shrink_to_fit();
+  }
+}
+
+void Runtime::CreatePaddleBackend() {
+#ifdef ENABLE_PADDLE_BACKEND
+  backend_ = utils::make_unique<PaddleBackend>();
+  FDASSERT(backend_->Init(option),
+           "Failed to initialized Paddle Inference backend.");
+#else
+  FDASSERT(false, "PaddleBackend is not available, please compiled with "
+                  "ENABLE_PADDLE_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
+         << "." << std::endl;
+
+  const char *pirApiFlag = "FLAGS_enable_pir_api";
+  const char *envValue = getenv(pirApiFlag);
+  if (envValue == nullptr ||
+      (std::string(envValue) != "0" && std::string(envValue) != "False")) {
+    FDWARNING
+        << "To ensure the correct operation of the Paddle backend, please set"
+           "the environment variable 'FLAGS_enable_pir_api' to 'False'."
+        << std::endl;
+  }
+}
+
+void Runtime::CreateOpenVINOBackend() {
+#ifdef ENABLE_OPENVINO_BACKEND
+  backend_ = utils::make_unique<OpenVINOBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize OpenVINOBackend.");
+#else
+  FDASSERT(false, "OpenVINOBackend is not available, please compiled with "
+                  "ENABLE_OPENVINO_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
+         << "." << std::endl;
+}
+
+void Runtime::CreateTVMBackend() {
+#ifdef ENABLE_TVM_BACKEND
+  backend_ = utils::make_unique<TVMBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize TVM backend.");
+#else
+  FDASSERT(false, "TVMBackend is not available, please compiled with "
+                  "ENABLE_TVM_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::TVM in " << option.device << "."
+         << std::endl;
+}
+
+void Runtime::CreateOrtBackend() {
+#ifdef ENABLE_ORT_BACKEND
+  backend_ = utils::make_unique<OrtBackend>();
+
+  FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
+#else
+  FDASSERT(false, "OrtBackend is not available, please compiled with "
+                  "ENABLE_ORT_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
+         << std::endl;
+}
+
+void Runtime::CreateTrtBackend() {
+#ifdef ENABLE_TRT_BACKEND
+  backend_ = utils::make_unique<TrtBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
+#else
+  FDASSERT(false, "TrtBackend is not available, please compiled with "
+                  "ENABLE_TRT_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
+         << std::endl;
+}
+
+void Runtime::CreateLiteBackend() {
+#ifdef ENABLE_LITE_BACKEND
+  backend_ = utils::make_unique<LiteBackend>();
+
+  FDASSERT(backend_->Init(option),
+           "Load model from nb file failed while initializing LiteBackend.");
+#else
+  FDASSERT(false, "LiteBackend is not available, please compiled with "
+                  "ENABLE_LITE_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device
+         << "." << std::endl;
+}
+
+void Runtime::CreateRKNPU2Backend() {
+#ifdef ENABLE_RKNPU2_BACKEND
+  backend_ = utils::make_unique<RKNPU2Backend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize RKNPU2 backend.");
+#else
+  FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
+                  "ENABLE_RKNPU2_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
+         << "." << std::endl;
+}
+
+void Runtime::CreateHorizonBackend() {
+#ifdef ENABLE_HORIZON_BACKEND
+  backend_ = utils::make_unique<HorizonBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize Horizon backend.");
+#else
+  FDASSERT(false, "HorizonBackend is not available, please compiled with ",
+           " ENABLE_HORIZON_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::HORIZONNPU in " << option.device
+         << "." << std::endl;
+}
+
+void Runtime::CreateSophgoNPUBackend() {
+#ifdef ENABLE_SOPHGO_BACKEND
+  backend_ = utils::make_unique<SophgoBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize Sophgo backend.");
+#else
+  FDASSERT(false, "SophgoBackend is not available, please compiled with "
+                  "ENABLE_SOPHGO_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
+         << "." << std::endl;
+}
+
+Runtime *Runtime::Clone(void *stream, int device_id) {
+  Runtime *runtime = new Runtime();
+  if (option.backend != Backend::OPENVINO &&
+      option.backend != Backend::PDINFER) {
+    runtime->Init(option);
+    FDWARNING << "Only OpenVINO/Paddle Inference support \
+                  clone engine to  reduce CPU/GPU memory usage now. For "
+              << option.backend
+              << ", UltraInfer will create a new engine which \
+                  will not share memory  with the current runtime."
+              << std::endl;
+    return runtime;
+  }
+  FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
+         << option.device << "." << std::endl;
+  runtime->option = option;
+  runtime->backend_ = backend_->Clone(option, stream, device_id);
+  return runtime;
+}
+
+void Runtime::CreatePorosBackend() {
+#ifdef ENABLE_POROS_BACKEND
+  backend_ = utils::make_unique<PorosBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
+#else
+  FDASSERT(false, "PorosBackend is not available, please compiled with "
+                  "ENABLE_POROS_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::POROS in " << option.device
+         << "." << std::endl;
+}
+
+// only for poros backend
+bool Runtime::Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors) {
+#ifdef ENABLE_POROS_BACKEND
+  option.poros_option.device = option.device;
+  option.poros_option.device_id = option.device_id;
+  option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
+  option.poros_option.max_batch_size = option.trt_option.max_batch_size;
+  option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
+
+  auto casted_backend = dynamic_cast<PorosBackend *>(backend_.get());
+  FDASSERT(
+      casted_backend->Compile(option.model_file, prewarm_tensors,
+                              option.poros_option),
+      "Load model from Torchscript failed while initliazing PorosBackend.");
+#else
+  FDASSERT(false, "PorosBackend is not available, please compiled with "
+                  "ENABLE_POROS_BACKEND=ON.");
+#endif
+  return true;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime.h b/libs/ultrainfer/ultrainfer/runtime/runtime.h
new file mode 100755
index 0000000000..da53567e16
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/runtime.h
@@ -0,0 +1,126 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file runtime.h
+    \brief A brief file description.
+
+    More details
+ */
+
+#pragma once
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/runtime/backends/backend.h"
+#include "ultrainfer/runtime/runtime_option.h"
+#include "ultrainfer/utils/perf.h"
+/** \brief All C++ UltraInfer APIs are defined inside this namespace
+ *
+ */
+namespace ultrainfer {
+
+/*! @brief Runtime object used to inference the loaded model on different
+ * devices
+ */
+struct ULTRAINFER_DECL Runtime {
+public:
+  /// Intialize a Runtime object with RuntimeOption
+  bool Init(const RuntimeOption &_option);
+
+  /** \brief Inference the model by the input data, and write to the output
+   *
+   * \param[in] input_tensors Notice the FDTensor::name should keep same with
+   * the model's input \param[in] output_tensors Inference results \return true
+   * if the inference successed, otherwise false
+   */
+  bool Infer(std::vector<FDTensor> &input_tensors,
+             std::vector<FDTensor> *output_tensors);
+
+  /** \brief No params inference the model.
+   *
+   *  the input and output data need to pass through the BindInputTensor and
+   * GetOutputTensor interfaces.
+   */
+  bool Infer();
+
+  /** \brief Get number of inputs
+   */
+  int NumInputs() { return backend_->NumInputs(); }
+  /** \brief Get number of outputs
+   */
+  int NumOutputs() { return backend_->NumOutputs(); }
+  /** \brief Get input information by index
+   */
+  TensorInfo GetInputInfo(int index);
+  /** \brief Get output information by index
+   */
+  TensorInfo GetOutputInfo(int index);
+  /** \brief Get all the input information
+   */
+  std::vector<TensorInfo> GetInputInfos();
+  /** \brief Get all the output information
+   */
+  std::vector<TensorInfo> GetOutputInfos();
+  /** \brief Bind FDTensor by name, no copy and share input memory
+   */
+  void BindInputTensor(const std::string &name, FDTensor &input);
+
+  /** \brief Bind FDTensor by name, no copy and share output memory.
+   *  Please make share the correctness of tensor shape of output.
+   */
+  void BindOutputTensor(const std::string &name, FDTensor &output);
+
+  /** \brief Get output FDTensor by name, no copy and share backend output
+   * memory
+   */
+  FDTensor *GetOutputTensor(const std::string &name);
+
+  /** \brief Clone new Runtime when multiple instances of the same model are
+   * created
+   *
+   * \param[in] stream CUDA Stream, defualt param is nullptr
+   * \return new Runtime* by this clone
+   */
+  Runtime *Clone(void *stream = nullptr, int device_id = -1);
+
+  void ReleaseModelMemoryBuffer();
+
+  RuntimeOption option;
+
+  /** \brief Compile TorchScript Module, only for Poros backend
+   *
+   * \param[in] prewarm_tensors Prewarm datas for compile
+   * \return true if compile successed, otherwise false
+   */
+  bool Compile(std::vector<std::vector<FDTensor>> &prewarm_tensors);
+  /** \brief Get profile time of Runtime after the profile process is done.
+   */
+  double GetProfileTime() {
+    return backend_->benchmark_result_.time_of_runtime;
+  }
+
+private:
+  void CreateOrtBackend();
+  void CreatePaddleBackend();
+  void CreateTrtBackend();
+  void CreateOpenVINOBackend();
+  void CreateLiteBackend();
+  void CreateRKNPU2Backend();
+  void CreateHorizonBackend();
+  void CreateSophgoNPUBackend();
+  void CreatePorosBackend();
+  void CreateTVMBackend();
+  std::unique_ptr<BaseBackend> backend_;
+  std::vector<FDTensor> input_tensors_;
+  std::vector<FDTensor> output_tensors_;
+};
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime_option.cc b/libs/ultrainfer/ultrainfer/runtime/runtime_option.cc
new file mode 100755
index 0000000000..114fcfe002
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/runtime_option.cc
@@ -0,0 +1,524 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/runtime/runtime.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+void RuntimeOption::SetModelPath(const std::string &model_path,
+                                 const std::string &params_path,
+                                 const ModelFormat &format) {
+  model_file = model_path;
+  params_file = params_path;
+  model_format = format;
+  model_from_memory_ = false;
+}
+
+void RuntimeOption::SetModelBuffer(const std::string &model_buffer,
+                                   const std::string &params_buffer,
+                                   const ModelFormat &format) {
+  model_file = model_buffer;
+  params_file = params_buffer;
+  model_format = format;
+  model_from_memory_ = true;
+}
+
+void RuntimeOption::UseGpu(int gpu_id) {
+#if defined(WITH_GPU) || defined(WITH_OPENCL)
+  device = Device::GPU;
+  device_id = gpu_id;
+
+#if defined(WITH_OPENCL) && defined(ENABLE_LITE_BACKEND)
+  paddle_lite_option.device = device;
+#endif
+
+#else
+  FDWARNING << "The UltraInfer didn't compile with GPU, will force to use CPU."
+            << std::endl;
+  device = Device::CPU;
+#endif
+}
+
+void RuntimeOption::UseCpu() { device = Device::CPU; }
+
+void RuntimeOption::UseRKNPU2(ultrainfer::rknpu2::CpuName rknpu2_name,
+                              ultrainfer::rknpu2::CoreMask rknpu2_core) {
+  rknpu2_option.cpu_name = rknpu2_name;
+  rknpu2_option.core_mask = rknpu2_core;
+  device = Device::RKNPU;
+}
+
+void RuntimeOption::UseHorizon() { device = Device::SUNRISENPU; }
+
+void RuntimeOption::UseTimVX() {
+  device = Device::TIMVX;
+  paddle_lite_option.device = device;
+}
+
+void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
+                                 bool locked, bool autotune,
+                                 const std::string &autotune_file,
+                                 const std::string &precision,
+                                 bool adaptive_seqlen, bool enable_multi_stream,
+                                 int64_t gm_default_size) {
+#ifdef WITH_KUNLUNXIN
+  device = Device::KUNLUNXIN;
+
+#ifdef ENABLE_LITE_BACKEND
+  paddle_lite_option.device = device;
+  paddle_lite_option.device_id = kunlunxin_id;
+  paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
+  paddle_lite_option.kunlunxin_locked = locked;
+  paddle_lite_option.kunlunxin_autotune = autotune;
+  paddle_lite_option.kunlunxin_autotune_file = autotune_file;
+  paddle_lite_option.kunlunxin_precision = precision;
+  paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
+  paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
+  paddle_lite_option.kunlunxin_gm_default_size = gm_default_size;
+#endif
+#ifdef ENABLE_PADDLE_BACKEND
+  paddle_infer_option.device = device;
+  paddle_infer_option.xpu_option.kunlunxin_device_id = kunlunxin_id;
+  paddle_infer_option.xpu_option.kunlunxin_l3_workspace_size =
+      l3_workspace_size;
+  paddle_infer_option.xpu_option.kunlunxin_locked = locked;
+  paddle_infer_option.xpu_option.kunlunxin_autotune = autotune;
+  paddle_infer_option.xpu_option.kunlunxin_autotune_file = autotune_file;
+  paddle_infer_option.xpu_option.kunlunxin_precision = precision;
+  paddle_infer_option.xpu_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
+  paddle_infer_option.xpu_option.kunlunxin_enable_multi_stream =
+      enable_multi_stream;
+// paddle_infer_option.xpu_option.kunlunxin_gm_default_size = gm_default_size;
+// use paddle_infer_option.xpu_option.SetXpuConfig() for more options.
+#endif
+
+#else
+  FDWARNING
+      << "The UltraInfer didn't compile with KUNLUNXIN, will force to use CPU."
+      << std::endl;
+  device = Device::CPU;
+#endif
+}
+
+void RuntimeOption::UseIpu(int device_num, int micro_batch_size,
+                           bool enable_pipelining, int batches_per_step) {
+#ifdef WITH_IPU
+  device = Device::IPU;
+  paddle_infer_option.ipu_option.ipu_device_num = device_num;
+  paddle_infer_option.ipu_option.ipu_micro_batch_size = micro_batch_size;
+  paddle_infer_option.ipu_option.ipu_enable_pipelining = enable_pipelining;
+  paddle_infer_option.ipu_option.ipu_batches_per_step = batches_per_step;
+// use paddle_infer_option.ipu_option.SetIpuConfig() for more options.
+#else
+  FDWARNING << "The UltraInfer didn't compile with IPU, will force to use CPU."
+            << std::endl;
+  device = Device::CPU;
+#endif
+}
+
+void RuntimeOption::UseAscend() {
+  device = Device::ASCEND;
+  paddle_lite_option.device = device;
+}
+
+void RuntimeOption::UseDirectML() { device = Device::DIRECTML; }
+
+void RuntimeOption::UseSophgo() {
+  device = Device::SOPHGOTPUD;
+  UseSophgoBackend();
+}
+
+void RuntimeOption::SetExternalStream(void *external_stream) {
+  external_stream_ = external_stream;
+}
+
+void RuntimeOption::SetCpuThreadNum(int thread_num) {
+  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
+  cpu_thread_num = thread_num;
+  paddle_lite_option.cpu_threads = thread_num;
+  ort_option.intra_op_num_threads = thread_num;
+  openvino_option.cpu_thread_num = thread_num;
+  paddle_infer_option.cpu_thread_num = thread_num;
+}
+
+void RuntimeOption::SetOrtGraphOptLevel(int level) {
+  FDWARNING << "`RuntimeOption::SetOrtGraphOptLevel` will be removed in "
+               "v1.2.0, please modify its member variables directly, e.g "
+               "`runtime_option.ort_option.graph_optimization_level = 99`."
+            << std::endl;
+  std::vector<int> supported_level{-1, 0, 1, 2};
+  auto valid_level = std::find(supported_level.begin(), supported_level.end(),
+                               level) != supported_level.end();
+  FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
+  ort_option.graph_optimization_level = level;
+}
+
+// use paddle inference backend
+void RuntimeOption::UsePaddleBackend() {
+#ifdef ENABLE_PADDLE_BACKEND
+  backend = Backend::PDINFER;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with Paddle Inference.");
+#endif
+}
+
+// use onnxruntime backend
+void RuntimeOption::UseOrtBackend() {
+#ifdef ENABLE_ORT_BACKEND
+  backend = Backend::ORT;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with OrtBackend.");
+#endif
+}
+
+// use sophgoruntime backend
+void RuntimeOption::UseSophgoBackend() {
+#ifdef ENABLE_SOPHGO_BACKEND
+  backend = Backend::SOPHGOTPU;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with SophgoBackend.");
+#endif
+}
+
+// use poros backend
+void RuntimeOption::UsePorosBackend() {
+#ifdef ENABLE_POROS_BACKEND
+  backend = Backend::POROS;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with PorosBackend.");
+#endif
+}
+
+void RuntimeOption::UseTrtBackend() {
+#ifdef ENABLE_TRT_BACKEND
+  backend = Backend::TRT;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with TrtBackend.");
+#endif
+}
+
+void RuntimeOption::UseOpenVINOBackend() {
+#ifdef ENABLE_OPENVINO_BACKEND
+  backend = Backend::OPENVINO;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with OpenVINO.");
+#endif
+}
+
+void RuntimeOption::UseLiteBackend() {
+#ifdef ENABLE_LITE_BACKEND
+  backend = Backend::LITE;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with Paddle Lite.");
+#endif
+}
+
+void RuntimeOption::UseHorizonNPUBackend() {
+#ifdef ENABLE_HORIZON_BACKEND
+  backend = Backend::HORIZONNPU;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with horizon");
+#endif
+}
+
+void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
+  FDWARNING << "`RuntimeOption::SetPaddleMKLDNN` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`option.paddle_infer_option.enable_mkldnn = true`"
+            << std::endl;
+  paddle_infer_option.enable_mkldnn = pd_mkldnn;
+}
+
+void RuntimeOption::DeletePaddleBackendPass(const std::string &pass_name) {
+  FDWARNING
+      << "`RuntimeOption::DeletePaddleBackendPass` will be removed in v1.2.0, "
+         "please use `option.paddle_infer_option.DeletePass` instead."
+      << std::endl;
+  paddle_infer_option.DeletePass(pass_name);
+}
+void RuntimeOption::EnablePaddleLogInfo() {
+  FDWARNING << "`RuntimeOption::EnablePaddleLogInfo` will be removed in "
+               "v1.2.0, please modify its member variable directly, e.g "
+               "`option.paddle_infer_option.enable_log_info = true`"
+            << std::endl;
+  paddle_infer_option.enable_log_info = true;
+}
+
+void RuntimeOption::DisablePaddleLogInfo() {
+  FDWARNING << "`RuntimeOption::DisablePaddleLogInfo` will be removed in "
+               "v1.2.0, please modify its member variable directly, e.g "
+               "`option.paddle_infer_option.enable_log_info = false`"
+            << std::endl;
+  paddle_infer_option.enable_log_info = false;
+}
+
+void RuntimeOption::EnablePaddleToTrt() {
+#ifdef ENABLE_PADDLE_BACKEND
+  FDWARNING << "`RuntimeOption::EnablePaddleToTrt` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`option.paddle_infer_option.enable_trt = true`"
+            << std::endl;
+  FDINFO << "While using TrtBackend with EnablePaddleToTrt, UltraInfer will "
+            "change to use Paddle Inference Backend."
+         << std::endl;
+  backend = Backend::PDINFER;
+  paddle_infer_option.enable_trt = true;
+#else
+  FDASSERT(false, "While using TrtBackend with EnablePaddleToTrt, require the "
+                  "UltraInfer is compiled with Paddle Inference Backend, "
+                  "please rebuild your UltraInfer.");
+#endif
+}
+
+void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
+  FDWARNING << "`RuntimeOption::SetPaddleMKLDNNCacheSize` will be removed in "
+               "v1.2.0, please modify its member variable directly, e.g "
+               "`option.paddle_infer_option.mkldnn_cache_size = size`."
+            << std::endl;
+  paddle_infer_option.mkldnn_cache_size = size;
+}
+
+void RuntimeOption::SetOpenVINODevice(const std::string &name) {
+  FDWARNING << "`RuntimeOption::SetOpenVINODevice` will be removed in v1.2.0, "
+               "please use `RuntimeOption.openvino_option.SetDeivce(const "
+               "std::string&)` instead."
+            << std::endl;
+  openvino_option.SetDevice(name);
+}
+
+void RuntimeOption::EnableLiteFP16() {
+  FDWARNING << "`RuntimeOption::EnableLiteFP16` will be removed in v1.2.0, "
+               "please modify its member variables directly, e.g "
+               "`runtime_option.paddle_lite_option.enable_fp16 = true`"
+            << std::endl;
+  paddle_lite_option.enable_fp16 = true;
+}
+
+void RuntimeOption::DisableLiteFP16() {
+  FDWARNING << "`RuntimeOption::EnableLiteFP16` will be removed in v1.2.0, "
+               "please modify its member variables directly, e.g "
+               "`runtime_option.paddle_lite_option.enable_fp16 = false`"
+            << std::endl;
+  paddle_lite_option.enable_fp16 = false;
+}
+
+void RuntimeOption::EnableLiteInt8() {
+  FDWARNING << "RuntimeOption::EnableLiteInt8 is a useless api, this calling "
+               "will not bring any effects, and will be removed in v1.2.0. if "
+               "you load a quantized model, it will automatically run with "
+               "int8 mode; otherwise it will run with float mode."
+            << std::endl;
+}
+
+void RuntimeOption::DisableLiteInt8() {
+  FDWARNING << "RuntimeOption::DisableLiteInt8 is a useless api, this calling "
+               "will not bring any effects, and will be removed in v1.2.0. if "
+               "you load a quantized model, it will automatically run with "
+               "int8 mode; otherwise it will run with float mode."
+            << std::endl;
+}
+
+void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
+  FDWARNING << "`RuntimeOption::SetLitePowerMode` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`runtime_option.paddle_lite_option.power_mode = 3;`"
+            << std::endl;
+  paddle_lite_option.power_mode = mode;
+}
+
+void RuntimeOption::SetLiteOptimizedModelDir(
+    const std::string &optimized_model_dir) {
+  FDWARNING
+      << "`RuntimeOption::SetLiteOptimizedModelDir` will be removed in v1.2.0, "
+         "please modify its member variable directly, e.g "
+         "`runtime_option.paddle_lite_option.optimized_model_dir = \"...\"`"
+      << std::endl;
+  paddle_lite_option.optimized_model_dir = optimized_model_dir;
+}
+
+void RuntimeOption::SetLiteSubgraphPartitionPath(
+    const std::string &nnadapter_subgraph_partition_config_path) {
+  FDWARNING << "`RuntimeOption::SetLiteSubgraphPartitionPath` will be removed "
+               "in v1.2.0, please modify its member variable directly, e.g "
+               "`runtime_option.paddle_lite_option.nnadapter_subgraph_"
+               "partition_config_path = \"...\";` "
+            << std::endl;
+  paddle_lite_option.nnadapter_subgraph_partition_config_path =
+      nnadapter_subgraph_partition_config_path;
+}
+
+void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
+    const std::string &nnadapter_subgraph_partition_config_buffer) {
+  FDWARNING
+      << "`RuntimeOption::SetLiteSubgraphPartitionConfigBuffer` will be "
+         "removed in v1.2.0, please modify its member variable directly, e.g "
+         "`runtime_option.paddle_lite_option.nnadapter_subgraph_partition_"
+         "config_buffer = ...`"
+      << std::endl;
+  paddle_lite_option.nnadapter_subgraph_partition_config_buffer =
+      nnadapter_subgraph_partition_config_buffer;
+}
+
+void RuntimeOption::SetLiteContextProperties(
+    const std::string &nnadapter_context_properties) {
+  FDWARNING << "`RuntimeOption::SetLiteContextProperties` will be removed in "
+               "v1.2.0, please modify its member variable directly, e.g "
+               "`runtime_option.paddle_lite_option.nnadapter_context_"
+               "properties = ...`"
+            << std::endl;
+  paddle_lite_option.nnadapter_context_properties =
+      nnadapter_context_properties;
+}
+
+void RuntimeOption::SetLiteModelCacheDir(
+    const std::string &nnadapter_model_cache_dir) {
+  FDWARNING
+      << "`RuntimeOption::SetLiteModelCacheDir` will be removed in v1.2.0, "
+         "please modify its member variable directly, e.g "
+         "`runtime_option.paddle_lite_option.nnadapter_model_cache_dir = ...`"
+      << std::endl;
+  paddle_lite_option.nnadapter_model_cache_dir = nnadapter_model_cache_dir;
+}
+
+void RuntimeOption::SetLiteDynamicShapeInfo(
+    const std::map<std::string, std::vector<std::vector<int64_t>>>
+        &nnadapter_dynamic_shape_info) {
+  FDWARNING << "`RuntimeOption::SetLiteDynamicShapeInfo` will be removed in "
+               "v1.2.0, please modify its member variable directly, e.g "
+               "`runtime_option.paddle_lite_option.paddle_lite_option."
+               "nnadapter_dynamic_shape_info = ...`"
+            << std::endl;
+  paddle_lite_option.nnadapter_dynamic_shape_info =
+      nnadapter_dynamic_shape_info;
+}
+
+void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
+    const std::string &nnadapter_mixed_precision_quantization_config_path) {
+  FDWARNING
+      << "`RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath` will be "
+         "removed in v1.2.0, please modify its member variable directly, e.g "
+         "`runtime_option.paddle_lite_option.paddle_lite_option.nnadapter_"
+         "mixed_precision_quantization_config_path = ...`"
+      << std::endl;
+  paddle_lite_option.nnadapter_mixed_precision_quantization_config_path =
+      nnadapter_mixed_precision_quantization_config_path;
+}
+
+void RuntimeOption::SetTrtInputShape(const std::string &input_name,
+                                     const std::vector<int32_t> &min_shape,
+                                     const std::vector<int32_t> &opt_shape,
+                                     const std::vector<int32_t> &max_shape) {
+  FDWARNING << "`RuntimeOption::SetTrtInputShape` will be removed in v1.2.0, "
+               "please use `RuntimeOption.trt_option.SetShape()` instead."
+            << std::endl;
+  trt_option.SetShape(input_name, min_shape, opt_shape, max_shape);
+}
+
+void RuntimeOption::SetTrtInputData(const std::string &input_name,
+                                    const std::vector<float> &min_shape_data,
+                                    const std::vector<float> &opt_shape_data,
+                                    const std::vector<float> &max_shape_data) {
+  FDWARNING << "`RuntimeOption::SetTrtInputData` will be removed in v1.2.0, "
+               "please use `RuntimeOption.trt_option.SetInputData()` instead."
+            << std::endl;
+  trt_option.SetInputData(input_name, min_shape_data, opt_shape_data,
+                          max_shape_data);
+}
+
+void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
+  FDWARNING << "`RuntimeOption::SetTrtMaxWorkspaceSize` will be removed in "
+               "v1.2.0, please modify its member variable directly, e.g "
+               "`RuntimeOption.trt_option.max_workspace_size = "
+            << max_workspace_size << "`." << std::endl;
+  trt_option.max_workspace_size = max_workspace_size;
+}
+void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) {
+  FDWARNING << "`RuntimeOption::SetTrtMaxBatchSize` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`RuntimeOption.trt_option.max_batch_size = "
+            << max_batch_size << "`." << std::endl;
+  trt_option.max_batch_size = max_batch_size;
+}
+
+void RuntimeOption::EnableTrtFP16() {
+  FDWARNING << "`RuntimeOption::EnableTrtFP16` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`runtime_option.trt_option.enable_fp16 = true;`"
+            << std::endl;
+  trt_option.enable_fp16 = true;
+}
+
+void RuntimeOption::DisableTrtFP16() {
+  FDWARNING << "`RuntimeOption::DisableTrtFP16` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`runtime_option.trt_option.enable_fp16 = false;`"
+            << std::endl;
+  trt_option.enable_fp16 = false;
+}
+
+void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; }
+
+void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; }
+
+void RuntimeOption::SetTrtCacheFile(const std::string &cache_file_path) {
+  FDWARNING << "`RuntimeOption::SetTrtCacheFile` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`runtime_option.trt_option.serialize_file = \""
+            << cache_file_path << "\"." << std::endl;
+  trt_option.serialize_file = cache_file_path;
+}
+
+void RuntimeOption::SetOpenVINOStreams(int num_streams) {
+  FDWARNING << "`RuntimeOption::SetOpenVINOStreams` will be removed in v1.2.0, "
+               "please modify its member variable directly, e.g "
+               "`runtime_option.openvino_option.num_streams = "
+            << num_streams << "`." << std::endl;
+  openvino_option.num_streams = num_streams;
+}
+
+void RuntimeOption::EnablePaddleTrtCollectShape() {
+  FDWARNING << "`RuntimeOption::EnablePaddleTrtCollectShape` will be removed "
+               "in v1.2.0, please modify its member variable directly, e.g "
+               "runtime_option.paddle_infer_option.collect_trt_shape = true`."
+            << std::endl;
+  paddle_infer_option.collect_trt_shape = true;
+}
+
+void RuntimeOption::DisablePaddleTrtCollectShape() {
+  FDWARNING << "`RuntimeOption::DisablePaddleTrtCollectShape` will be removed "
+               "in v1.2.0, please modify its member variable directly, e.g "
+               "runtime_option.paddle_infer_option.collect_trt_shape = false`."
+            << std::endl;
+  paddle_infer_option.collect_trt_shape = false;
+}
+
+void RuntimeOption::DisablePaddleTrtOPs(const std::vector<std::string> &ops) {
+  FDWARNING << "`RuntimeOption::DisablePaddleTrtOps` will be removed in "
+               "v.1.20, please use "
+               "`runtime_option.paddle_infer_option.DisableTrtOps` instead."
+            << std::endl;
+  paddle_infer_option.DisableTrtOps(ops);
+}
+
+void RuntimeOption::UseTVMBackend() {
+#ifdef ENABLE_TVM_BACKEND
+  backend = Backend::TVM;
+#else
+  FDASSERT(false, "The UltraInfer didn't compile with TVMBackend.");
+#endif
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/runtime/runtime_option.h b/libs/ultrainfer/ultrainfer/runtime/runtime_option.h
new file mode 100755
index 0000000000..8b372a28d2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/runtime/runtime_option.h
@@ -0,0 +1,282 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file runtime_option.h
+    \brief A brief file description.
+    More details
+ */
+
+#pragma once
+
+#include "ultrainfer/benchmark/option.h"
+#include "ultrainfer/runtime/backends/lite/option.h"
+#include "ultrainfer/runtime/backends/openvino/option.h"
+#include "ultrainfer/runtime/backends/ort/option.h"
+#include "ultrainfer/runtime/backends/paddle/option.h"
+#include "ultrainfer/runtime/backends/poros/option.h"
+#include "ultrainfer/runtime/backends/rknpu2/option.h"
+#include "ultrainfer/runtime/backends/sophgo/option.h"
+#include "ultrainfer/runtime/backends/tensorrt/option.h"
+#include "ultrainfer/runtime/backends/tvm/option.h"
+#include "ultrainfer/runtime/enum_variables.h"
+#include <algorithm>
+#include <map>
+#include <vector>
+
+namespace ultrainfer {
+
+/*! @brief Option object used when create a new Runtime object
+ */
+struct ULTRAINFER_DECL RuntimeOption {
+  /** \brief Set path of model file and parameter file
+   *
+   * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for
+   * Paddle format model / ResNet50/model.onnx for ONNX format model \param[in]
+   * params_path Path of parameter file, this only used when the model format is
+   * Paddle, e.g Resnet50/model.pdiparams \param[in] format Format of the loaded
+   * model
+   */
+  void SetModelPath(const std::string &model_path,
+                    const std::string &params_path = "",
+                    const ModelFormat &format = ModelFormat::PADDLE);
+
+  /** \brief Specify the memory buffer of model and parameter. Used when model
+   * and params are loaded directly from memory
+   *
+   * \param[in] model_buffer The string of model memory buffer
+   * \param[in] params_buffer The string of parameters memory buffer
+   * \param[in] format Format of the loaded model
+   */
+  void SetModelBuffer(const std::string &model_buffer,
+                      const std::string &params_buffer = "",
+                      const ModelFormat &format = ModelFormat::PADDLE);
+
+  /// Use cpu to inference, the runtime will inference on CPU by default
+  void UseCpu();
+  /// Use Nvidia GPU to inference
+  void UseGpu(int gpu_id = 0);
+  /// Use RKNPU2 e.g RK3588/RK356X to inference
+  void UseRKNPU2(ultrainfer::rknpu2::CpuName rknpu2_name =
+                     ultrainfer::rknpu2::CpuName::RK356X,
+                 ultrainfer::rknpu2::CoreMask rknpu2_core =
+                     ultrainfer::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO);
+  // Use Horizon NPU to inference
+  void UseHorizon();
+  /// Use TimVX e.g RV1126/A311D to inference
+  void UseTimVX();
+  /// Use Huawei Ascend to inference
+  void UseAscend();
+
+  /// Use onnxruntime DirectML to inference
+  void UseDirectML();
+
+  /// Use Sophgo to inference
+  void UseSophgo();
+  /// \brief Turn on KunlunXin XPU.
+  ///
+  /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
+  /// \param l3_workspace_size The size of the video memory allocated by the l3
+  ///         cache, the maximum is 16M.
+  /// \param locked Whether the allocated L3 cache can be locked. If false,
+  ///       it means that the L3 cache is not locked, and the allocated L3
+  ///       cache can be shared by multiple models, and multiple models
+  ///       sharing the L3 cache will be executed sequentially on the card.
+  /// \param autotune Whether to autotune the conv operator in the model. If
+  ///       true, when the conv operator of a certain dimension is executed
+  ///       for the first time, it will automatically search for a better
+  ///       algorithm to improve the performance of subsequent conv operators
+  ///       of the same dimension.
+  /// \param autotune_file Specify the path of the autotune file. If
+  ///       autotune_file is specified, the algorithm specified in the
+  ///       file will be used and autotune will not be performed again.
+  /// \param precision Calculation accuracy of multi_encoder
+  /// \param adaptive_seqlen Is the input of multi_encoder variable length
+  /// \param enable_multi_stream Whether to enable the multi stream of
+  ///        KunlunXin XPU.
+  /// \param gm_default_size The default size of global memory of KunlunXin XPU.
+  ///
+  void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
+                    bool locked = false, bool autotune = true,
+                    const std::string &autotune_file = "",
+                    const std::string &precision = "int16",
+                    bool adaptive_seqlen = false,
+                    bool enable_multi_stream = false,
+                    int64_t gm_default_size = 0);
+
+  void SetExternalStream(void *external_stream);
+
+  /*
+   * @brief Set number of cpu threads while inference on CPU, by default it will
+   * decided by the different backends
+   */
+  void SetCpuThreadNum(int thread_num);
+  /// Set Paddle Inference as inference backend, support CPU/GPU
+  void UsePaddleInferBackend() { return UsePaddleBackend(); }
+  /// Set ONNX Runtime as inference backend, support CPU/GPU
+  void UseOrtBackend();
+  /// Set SOPHGO Runtime as inference backend, support SOPHGO
+  void UseSophgoBackend();
+  /// Set TensorRT as inference backend, only support GPU
+  void UseTrtBackend();
+  /// Set Poros backend as inference backend, support CPU/GPU
+  void UsePorosBackend();
+  /// Set OpenVINO as inference backend, only support CPU
+  void UseOpenVINOBackend();
+  /// Set Paddle Lite as inference backend, only support arm cpu
+  void UsePaddleLiteBackend() { return UseLiteBackend(); }
+  /** \Use Graphcore IPU to inference.
+   *
+   * \param[in] device_num the number of IPUs.
+   * \param[in] micro_batch_size the batch size in the graph, only work when
+   * graph has no batch shape info. \param[in] enable_pipelining enable
+   * pipelining. \param[in] batches_per_step the number of batches per run in
+   * pipelining.
+   */
+  void UseIpu(int device_num = 1, int micro_batch_size = 1,
+              bool enable_pipelining = false, int batches_per_step = 1);
+
+  /// Option to configure ONNX Runtime backend
+  OrtBackendOption ort_option;
+  /// Option to configure TensorRT backend
+  TrtBackendOption trt_option;
+  /// Option to configure Paddle Inference backend
+  PaddleBackendOption paddle_infer_option;
+  /// Option to configure Poros backend
+  PorosBackendOption poros_option;
+  /// Option to configure OpenVINO backend
+  OpenVINOBackendOption openvino_option;
+  /// Option to configure Paddle Lite backend
+  LiteBackendOption paddle_lite_option;
+  /// Option to configure RKNPU2 backend
+  RKNPU2BackendOption rknpu2_option;
+  /// Option to configure TVM backend
+  TVMBackendOption tvm_option;
+
+  //  \brief Set the profile mode as 'true'.
+  //
+  // \param[in] inclue_h2d_d2h Whether to
+  //            include time of H2D_D2H for time of runtime.
+  // \param[in] repeat Repeat times for runtime inference.
+  // \param[in] warmup Warmup times for runtime inference.
+  //
+  void EnableProfiling(bool inclue_h2d_d2h = false, int repeat = 100,
+                       int warmup = 50) {
+    benchmark_option.enable_profile = true;
+    benchmark_option.warmup = warmup;
+    benchmark_option.repeats = repeat;
+    benchmark_option.include_h2d_d2h = inclue_h2d_d2h;
+  }
+
+  // \brief Set the profile mode as 'false'.
+  //
+  void DisableProfiling() { benchmark_option.enable_profile = false; }
+
+  // \brief Enable to check if current backend set by
+  //        user can be found at valid_xxx_backend.
+  //
+  void EnableValidBackendCheck() { enable_valid_backend_check = true; }
+  // \brief Disable to check if current backend set by
+  //        user can be found at valid_xxx_backend.
+  //
+  void DisableValidBackendCheck() { enable_valid_backend_check = false; }
+
+  // Benchmark option
+  benchmark::BenchmarkOption benchmark_option;
+  // enable the check for valid backend, default true.
+  bool enable_valid_backend_check = true;
+
+  // If model_from_memory is true, the model_file and params_file is
+  // binary stream in memory;
+  // Otherwise, the model_file and params_file means the path of file
+  std::string model_file = "";
+  std::string params_file = "";
+  bool model_from_memory_ = false;
+  // format of input model
+  ModelFormat model_format = ModelFormat::PADDLE;
+
+  // for cpu inference
+  // default will let the backend choose their own default value
+  int cpu_thread_num = -1;
+  int device_id = 0;
+  Backend backend = Backend::UNKNOWN;
+
+  Device device = Device::CPU;
+
+  void *external_stream_ = nullptr;
+
+  bool enable_pinned_memory = false;
+
+  // *** The belowing api are deprecated, will be removed in v1.2.0
+  // *** Do not use it anymore
+  void SetPaddleMKLDNN(bool pd_mkldnn = true);
+  void EnablePaddleToTrt();
+  void DeletePaddleBackendPass(const std::string &delete_pass_name);
+  void EnablePaddleLogInfo();
+  void DisablePaddleLogInfo();
+  void SetPaddleMKLDNNCacheSize(int size);
+  void SetOpenVINODevice(const std::string &name = "CPU");
+  void SetOpenVINOShapeInfo(
+      const std::map<std::string, std::vector<int64_t>> &shape_info) {
+    openvino_option.shape_infos = shape_info;
+  }
+  void SetOpenVINOCpuOperators(const std::vector<std::string> &operators) {
+    openvino_option.SetCpuOperators(operators);
+  }
+  void SetLiteOptimizedModelDir(const std::string &optimized_model_dir);
+  void SetLiteSubgraphPartitionPath(
+      const std::string &nnadapter_subgraph_partition_config_path);
+  void SetLiteSubgraphPartitionConfigBuffer(
+      const std::string &nnadapter_subgraph_partition_config_buffer);
+  void
+  SetLiteContextProperties(const std::string &nnadapter_context_properties);
+  void SetLiteModelCacheDir(const std::string &nnadapter_model_cache_dir);
+  void SetLiteDynamicShapeInfo(
+      const std::map<std::string, std::vector<std::vector<int64_t>>>
+          &nnadapter_dynamic_shape_info);
+  void SetLiteMixedPrecisionQuantizationConfigPath(
+      const std::string &nnadapter_mixed_precision_quantization_config_path);
+  void EnableLiteFP16();
+  void DisableLiteFP16();
+  void EnableLiteInt8();
+  void DisableLiteInt8();
+  void SetLitePowerMode(LitePowerMode mode);
+  void SetTrtInputShape(
+      const std::string &input_name, const std::vector<int32_t> &min_shape,
+      const std::vector<int32_t> &opt_shape = std::vector<int32_t>(),
+      const std::vector<int32_t> &max_shape = std::vector<int32_t>());
+
+  void SetTrtInputData(
+      const std::string &input_name, const std::vector<float> &min_shape_data,
+      const std::vector<float> &opt_shape_data = std::vector<float>(),
+      const std::vector<float> &max_shape_data = std::vector<float>());
+
+  void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
+  void SetTrtMaxBatchSize(size_t max_batch_size);
+  void EnableTrtFP16();
+  void DisableTrtFP16();
+  void SetTrtCacheFile(const std::string &cache_file_path);
+  void EnablePinnedMemory();
+  void DisablePinnedMemory();
+  void EnablePaddleTrtCollectShape();
+  void DisablePaddleTrtCollectShape();
+  void DisablePaddleTrtOPs(const std::vector<std::string> &ops);
+  void SetOpenVINOStreams(int num_streams);
+  void SetOrtGraphOptLevel(int level = -1);
+  void UsePaddleBackend();
+  void UseLiteBackend();
+  void UseHorizonNPUBackend();
+  void UseTVMBackend();
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text.h b/libs/ultrainfer/ultrainfer/text.h
new file mode 100755
index 0000000000..d13d05dd63
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "ultrainfer/core/config.h"
+#ifdef ENABLE_TEXT
+#include "ultrainfer/text/uie/model.h"
+#endif
diff --git a/libs/ultrainfer/ultrainfer/text/common/option.h b/libs/ultrainfer/ultrainfer/text/common/option.h
new file mode 100755
index 0000000000..e00ff7178b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/common/option.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace text {
+
+struct ULTRAINFER_DECL TextPreprocessOption {};
+struct ULTRAINFER_DECL TextPostprocessOption {};
+struct ULTRAINFER_DECL PredictionOption {};
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/common/result.cc b/libs/ultrainfer/ultrainfer/text/common/result.cc
new file mode 100755
index 0000000000..2d79b4dd16
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/common/result.cc
@@ -0,0 +1,18 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/text/common/result.h"
+
+namespace ultrainfer {
+namespace text {} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/common/result.h b/libs/ultrainfer/ultrainfer/text/common/result.h
new file mode 100755
index 0000000000..7002083d15
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/common/result.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace text {
+
+struct ULTRAINFER_DECL Result {};
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc
new file mode 100755
index 0000000000..d8c2349a2c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.cc
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/text/postprocessor/postprocessor.h"
+
+namespace ultrainfer {
+namespace text {
+
+bool Postprocessor::Decode(const std::vector<FDTensor> &model_result,
+                           Result *decoded_result) const {
+  return true;
+}
+
+bool Postprocessor::DecodeBatch(const std::vector<FDTensor> &model_result,
+                                Result *decoded_result) const {
+  return true;
+}
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h
new file mode 100755
index 0000000000..fb969caa62
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/postprocessor/postprocessor.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/text/common/result.h"
+#include "ultrainfer/utils/utils.h"
+#include <vector>
+
+namespace ultrainfer {
+namespace text {
+
+class Postprocessor {
+public:
+  virtual bool Decode(const std::vector<FDTensor> &model_result,
+                      Result *decoded_result) const;
+  virtual bool DecodeBatch(const std::vector<FDTensor> &model_result,
+                           Result *decoded_result) const;
+};
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc
new file mode 100755
index 0000000000..01848a4277
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.cc
@@ -0,0 +1,32 @@
+
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/text/preprocessor/preprocessor.h"
+
+namespace ultrainfer {
+namespace text {
+
+bool Preprocessor::Encode(const std::string &raw_text,
+                          std::vector<FDTensor> *encoded_tensor) const {
+  return true;
+}
+
+bool Preprocessor::EncodeBatch(const std::vector<std::string> &raw_texts,
+                               std::vector<FDTensor> *encoded_tensor) const {
+  return true;
+}
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h
new file mode 100755
index 0000000000..28ccd53272
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/preprocessor/preprocessor.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include <memory>
+#include <vector>
+
+namespace ultrainfer {
+namespace text {
+
+class Preprocessor {
+public:
+  virtual bool Encode(const std::string &raw_text,
+                      std::vector<FDTensor> *encoded_tensor) const;
+  virtual bool EncodeBatch(const std::vector<std::string> &raw_texts,
+                           std::vector<FDTensor> *encoded_tensor) const;
+};
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/text_model.cc b/libs/ultrainfer/ultrainfer/text/text_model.cc
new file mode 100755
index 0000000000..efa6c7517c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/text_model.cc
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/text/text_model.h"
+#include "ultrainfer/text/common/option.h"
+#include "ultrainfer/text/common/result.h"
+#include "ultrainfer/text/postprocessor/postprocessor.h"
+#include "ultrainfer/text/preprocessor/preprocessor.h"
+
+namespace ultrainfer {
+namespace text {
+
+bool TextModel::Predict(const std::string &raw_text, Result *result,
+                        const PredictionOption &option) {
+  // Preprocess
+  std::vector<FDTensor> input_tensor;
+  std::vector<FDTensor> output_tensor;
+  if (!preprocessor_->Encode(raw_text, &input_tensor)) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+
+  // Inference Runtime
+  if (!Infer(input_tensor, &output_tensor)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  // Postprocess
+  if (postprocessor_->Decode(output_tensor, result)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool TextModel::PredictBatch(const std::vector<std::string> &raw_text_array,
+                             Result *results, const PredictionOption &option) {
+  // Preprocess
+  std::vector<FDTensor> input_tensor;
+  std::vector<FDTensor> output_tensor;
+  if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+
+  // Inference Runtime
+  if (!Infer(input_tensor, &output_tensor)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  // Postprocess
+  if (postprocessor_->DecodeBatch(output_tensor, results)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/text_model.h b/libs/ultrainfer/ultrainfer/text/text_model.h
new file mode 100755
index 0000000000..63f8db5a0f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/text_model.h
@@ -0,0 +1,50 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <memory>
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+
+namespace ultrainfer {
+namespace text {
+
+class Preprocessor;
+class Postprocessor;
+class Result;
+class PredictionOption;
+
+class ULTRAINFER_DECL TextModel : public UltraInferModel {
+public:
+  virtual std::string ModelName() const { return "TextModel"; }
+  virtual bool Predict(const std::string &raw_text, Result *result,
+                       const PredictionOption &option);
+  virtual bool PredictBatch(const std::vector<std::string> &raw_text_array,
+                            Result *result, const PredictionOption &option);
+  template <typename T, typename... Args> void SetPreprocessor(Args &&...args) {
+    preprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
+  }
+  template <typename T, typename... Args>
+  void SetPostprocessor(Args &&...args) {
+    postprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
+  }
+
+private:
+  std::unique_ptr<Preprocessor> preprocessor_;
+  std::unique_ptr<Postprocessor> postprocessor_;
+};
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/text_pybind.cc b/libs/ultrainfer/ultrainfer/text/text_pybind.cc
new file mode 100755
index 0000000000..8dbe39fd57
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/text_pybind.cc
@@ -0,0 +1,63 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace py = pybind11;
+using namespace py::literals;
+
+namespace ultrainfer {
+
+void BindUIE(py::module &m);
+
+py::dict ConvertUIEResultToDict(const text::UIEResult &self) {
+  py::dict d;
+  d["start"] = self.start_;
+  d["end"] = self.end_;
+  d["probability"] = self.probability_;
+  d["text"] = self.text_;
+
+  if (!self.relation_.empty()) {
+    d["relation"] = py::dict();
+    for (auto iter = self.relation_.begin(); iter != self.relation_.end();
+         ++iter) {
+      py::list l;
+      for (auto result_iter = iter->second.begin();
+           result_iter != iter->second.end(); ++result_iter) {
+        l.append(ConvertUIEResultToDict(*result_iter));
+      }
+      d["relation"][iter->first.c_str()] = l;
+    }
+  }
+  return d;
+}
+
+void BindText(py::module &m) {
+  py::class_<text::UIEResult>(m, "UIEResult", py::dynamic_attr())
+      .def(py::init())
+      .def_readwrite("start", &text::UIEResult::start_)
+      .def_readwrite("end", &text::UIEResult::end_)
+      .def_readwrite("probability", &text::UIEResult::probability_)
+      .def_readwrite("text", &text::UIEResult::text_)
+      .def_readwrite("relation", &text::UIEResult::relation_)
+      .def("get_dict",
+           [](const text::UIEResult &self) {
+             return ConvertUIEResultToDict(self);
+           })
+      .def("__repr__", &text::UIEResult::Str)
+      .def("__str__", &text::UIEResult::Str);
+  BindUIE(m);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/uie/model.cc b/libs/ultrainfer/ultrainfer/text/uie/model.cc
new file mode 100755
index 0000000000..82e46fefb1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/uie/model.cc
@@ -0,0 +1,797 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/text/uie/model.h"
+
+#include <algorithm>
+#include <codecvt>
+#include <locale>
+#include <queue>
+#include <sstream>
+
+#include "fast_tokenizer/pretokenizers/pretokenizer.h"
+#include "fast_tokenizer/utils/utf8.h"
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/function/split.h"
+
+namespace ultrainfer {
+namespace text {
+
+static std::string DBC2SBC(const std::string &content) {
+  std::string result;
+  size_t content_utf8_len = 0;
+  while (content_utf8_len < content.length()) {
+    uint32_t content_char;
+    auto content_char_width = fast_tokenizer::utils::UTF8ToUInt32(
+        content.data() + content_utf8_len, &content_char);
+    content_char = fast_tokenizer::utils::UTF8ToUnicode(content_char);
+    if (content_char == 0x3000) {
+      content_char = 0x0020;
+    } else {
+      content_char -= 0xfee0;
+    }
+    if (!(content_char >= 0x0021 && content_char <= 0x7e)) {
+      result.append(content.data() + content_utf8_len, content_char_width);
+    } else {
+      char dst_char[5] = {0};
+      uint32_t utf8_uint32 = fast_tokenizer::utils::UnicodeToUTF8(content_char);
+      uint32_t utf8_char_count =
+          fast_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
+      result.append(dst_char, utf8_char_count);
+    }
+    content_utf8_len += content_char_width;
+  }
+  return result;
+}
+
+static std::ostream &PrintResult(std::ostream &os, const UIEResult &result,
+                                 int tab_size) {
+  constexpr int TAB_OFFSET = 4;
+  // Print text
+  for (int i = 0; i < tab_size; ++i) {
+    os << " ";
+  }
+  os << "text: " << result.text_ << "\n";
+
+  // Print probability
+  for (int i = 0; i < tab_size; ++i) {
+    os << " ";
+  }
+  os << "probability: " << result.probability_ << "\n";
+
+  if (result.start_ != 0 || result.end_ != 0) {
+    // Print start
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "start: " << result.start_ << "\n";
+
+    // Print end
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "end: " << result.end_ << "\n";
+  }
+
+  // Print relation
+  if (result.relation_.size() > 0) {
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "relation:\n";
+    for (auto &&curr_relation : result.relation_) {
+      for (int i = 0; i < tab_size + TAB_OFFSET; ++i) {
+        os << " ";
+      }
+      os << curr_relation.first << ":\n";
+      for (int i = 0; i < curr_relation.second.size(); ++i) {
+        PrintResult(os, curr_relation.second[i],
+                    tab_size + TAB_OFFSET + TAB_OFFSET);
+      }
+    }
+  }
+  os << "\n";
+  return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const UIEResult &result) {
+  return PrintResult(os, result, 0);
+}
+
+std::ostream &operator<<(
+    std::ostream &os,
+    const std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>
+        &results) {
+  os << "The result:\n";
+  for (int i = 0; i < results.size(); ++i) {
+    for (auto &&curr_result : results[i]) {
+      os << curr_result.first << ": \n";
+      for (auto &&uie_result : curr_result.second) {
+        PrintResult(os, uie_result, 4);
+      }
+    }
+    os << std::endl;
+  }
+  return os;
+}
+
+std::string UIEResult::Str() const {
+  std::ostringstream oss;
+  oss << *this;
+  return oss.str();
+}
+
+void Schema::CreateRoot(const std::string &name) {
+  root_ = ultrainfer::utils::make_unique<SchemaNode>(name);
+}
+
+Schema::Schema(const std::string &schema, const std::string &name) {
+  CreateRoot(name);
+  root_->AddChild(schema);
+}
+
+Schema::Schema(const std::vector<std::string> &schema_list,
+               const std::string &name) {
+  CreateRoot(name);
+  for (const auto &schema : schema_list) {
+    root_->AddChild(schema);
+  }
+}
+
+Schema::Schema(const std::vector<SchemaNode> &schema_list,
+               const std::string &name) {
+  CreateRoot(name);
+  for (const auto &schema : schema_list) {
+    root_->AddChild(schema);
+  }
+}
+
+Schema::Schema(const SchemaNode &schema, const std::string &name) {
+  CreateRoot(name);
+  root_->AddChild(schema);
+}
+
+UIEModel::UIEModel(const std::string &model_file,
+                   const std::string &params_file,
+                   const std::string &vocab_file, float position_prob,
+                   size_t max_length, const std::vector<std::string> &schema,
+                   int batch_size,
+                   const ultrainfer::RuntimeOption &custom_option,
+                   const ultrainfer::ModelFormat &model_format,
+                   SchemaLanguage schema_language)
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
+      tokenizer_(vocab_file) {
+  runtime_option = custom_option;
+  runtime_option.SetModelPath(model_file, params_file, model_format);
+  initialized = Initialize();
+  SetSchema(schema);
+  tokenizer_.EnableTruncMethod(
+      max_length, 0, fast_tokenizer::core::Direction::RIGHT,
+      fast_tokenizer::core::TruncStrategy::LONGEST_FIRST);
+}
+
+UIEModel::UIEModel(const std::string &model_file,
+                   const std::string &params_file,
+                   const std::string &vocab_file, float position_prob,
+                   size_t max_length, const std::vector<SchemaNode> &schema,
+                   int batch_size,
+                   const ultrainfer::RuntimeOption &custom_option,
+                   const ultrainfer::ModelFormat &model_format,
+                   SchemaLanguage schema_language)
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
+      tokenizer_(vocab_file) {
+  runtime_option = custom_option;
+  runtime_option.SetModelPath(model_file, params_file, model_format);
+  initialized = Initialize();
+  SetSchema(schema);
+  tokenizer_.EnableTruncMethod(
+      max_length, 0, fast_tokenizer::core::Direction::RIGHT,
+      fast_tokenizer::core::TruncStrategy::LONGEST_FIRST);
+}
+
+UIEModel::UIEModel(const std::string &model_file,
+                   const std::string &params_file,
+                   const std::string &vocab_file, float position_prob,
+                   size_t max_length, const SchemaNode &schema, int batch_size,
+                   const ultrainfer::RuntimeOption &custom_option,
+                   const ultrainfer::ModelFormat &model_format,
+                   SchemaLanguage schema_language)
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
+      tokenizer_(vocab_file) {
+  runtime_option = custom_option;
+  runtime_option.SetModelPath(model_file, params_file, model_format);
+  initialized = Initialize();
+  SetSchema(schema);
+  tokenizer_.EnableTruncMethod(
+      max_length, 0, fast_tokenizer::core::Direction::RIGHT,
+      fast_tokenizer::core::TruncStrategy::LONGEST_FIRST);
+}
+
+bool UIEModel::Initialize() {
+  SetValidBackend();
+  return InitRuntime();
+}
+
+void UIEModel::SetValidBackend() {
+  // TODO(zhoushunjie): Add lite backend in future
+  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER,
+                        Backend::LITE};
+  valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+}
+
+void UIEModel::SetSchema(const std::vector<std::string> &schema) {
+  schema_ = ultrainfer::utils::make_unique<Schema>(schema);
+}
+
+void UIEModel::SetSchema(const std::vector<SchemaNode> &schema) {
+  schema_ = ultrainfer::utils::make_unique<Schema>(schema);
+}
+
+void UIEModel::SetSchema(const SchemaNode &schema) {
+  schema_ = ultrainfer::utils::make_unique<Schema>(schema);
+}
+
+void UIEModel::AutoSplitter(const std::vector<std::string> &texts,
+                            size_t max_length,
+                            std::vector<std::string> *short_texts,
+                            std::vector<std::vector<size_t>> *input_mapping) {
+  size_t cnt_org = 0;
+  size_t cnt_short = 0;
+  for (auto &text : texts) {
+    auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(text.c_str(),
+                                                                 text.length());
+    if (text_len <= max_length) {
+      short_texts->push_back(text);
+      if (input_mapping->size() <= cnt_org) {
+        input_mapping->push_back({cnt_short});
+      } else {
+        (*input_mapping)[cnt_org].push_back(cnt_short);
+      }
+      cnt_short += 1;
+    } else {
+      fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(text);
+      for (size_t start = 0; start < text_len; start += max_length) {
+        size_t end = start + max_length;
+        if (end > text_len) {
+          end = text_len;
+        }
+        fast_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        short_texts->emplace_back(text.data() + byte_offset.first,
+                                  byte_offset.second - byte_offset.first);
+      }
+      auto short_idx = cnt_short;
+      cnt_short += text_len / max_length;
+      if (text_len % max_length != 0) {
+        ++cnt_short;
+      }
+      std::vector<size_t> temp_text_id(cnt_short - short_idx);
+      std::iota(temp_text_id.begin(), temp_text_id.end(), short_idx);
+      if (input_mapping->size() <= cnt_org) {
+        input_mapping->push_back(std::move(temp_text_id));
+      } else {
+        (*input_mapping)[cnt_org].insert((*input_mapping)[cnt_org].end(),
+                                         temp_text_id.begin(),
+                                         temp_text_id.end());
+      }
+    }
+    cnt_org += 1;
+  }
+}
+
+void UIEModel::GetCandidateIdx(
+    const float *probs, int64_t batch_size, int64_t seq_len,
+    std::vector<std::vector<std::pair<int64_t, float>>> *candidate_idx_prob,
+    float threshold) const {
+  for (int i = 0; i < batch_size; ++i) {
+    candidate_idx_prob->push_back({});
+    for (int j = 0; j < seq_len; ++j) {
+      if (probs[i * seq_len + j] > threshold) {
+        candidate_idx_prob->back().push_back({j, probs[i * seq_len + j]});
+      }
+    }
+  }
+}
+
+bool UIEModel::IdxProbCmp::operator()(
+    const std::pair<IDX_PROB, IDX_PROB> &lhs,
+    const std::pair<IDX_PROB, IDX_PROB> &rhs) const {
+  if (lhs.first.first == rhs.first.first) {
+    return lhs.second.first < rhs.second.first;
+  }
+  return lhs.first.first < rhs.first.first;
+}
+
+void UIEModel::GetSpan(const std::vector<IDX_PROB> &start_idx_prob,
+                       const std::vector<IDX_PROB> &end_idx_prob,
+                       SPAN_SET *span_set) const {
+  size_t start_pointer = 0;
+  size_t end_pointer = 0;
+  size_t len_start = start_idx_prob.size();
+  size_t len_end = end_idx_prob.size();
+  while (start_pointer < len_start && end_pointer < len_end) {
+    if (start_idx_prob[start_pointer].first ==
+        end_idx_prob[end_pointer].first) {
+      span_set->insert(std::make_pair(start_idx_prob[start_pointer],
+                                      end_idx_prob[end_pointer]));
+      ++start_pointer;
+      ++end_pointer;
+    } else if (start_idx_prob[start_pointer].first <
+               end_idx_prob[end_pointer].first) {
+      span_set->insert(std::make_pair(start_idx_prob[start_pointer],
+                                      end_idx_prob[end_pointer]));
+      ++start_pointer;
+    } else {
+      ++end_pointer;
+    }
+  }
+}
+void UIEModel::GetSpanIdxAndProbs(
+    const SPAN_SET &span_set,
+    const std::vector<fast_tokenizer::core::Offset> &offset_mapping,
+    std::vector<SpanIdx> *span_idxs, std::vector<float> *probs) const {
+  auto first_sep_idx =
+      std::find_if(offset_mapping.begin() + 1, offset_mapping.end(),
+                   [](const fast_tokenizer::core::Offset &offset) {
+                     return offset == fast_tokenizer::core::Offset(0, 0);
+                   });
+  auto prompt_end_token_id =
+      std::distance(offset_mapping.begin(), first_sep_idx) - 1;
+  for (auto &&span_item : span_set) {
+    probs->push_back(span_item.first.second * span_item.second.second);
+    auto start_id = offset_mapping[span_item.first.first].first;
+    auto end_id = offset_mapping[span_item.second.first].second;
+    bool is_prompt = span_item.second.first <= prompt_end_token_id &&
+                     span_item.second.first > 0;
+    span_idxs->push_back({{start_id, end_id}, is_prompt});
+  }
+}
+
+void UIEModel::ConvertSpanToUIEResult(
+    const std::vector<std::string> &texts,
+    const std::vector<std::string> &prompts,
+    const std::vector<std::vector<SpanIdx>> &span_idxs,
+    const std::vector<std::vector<float>> &probs,
+    std::vector<std::vector<UIEResult>> *results) const {
+  auto batch_size = texts.size();
+  for (int i = 0; i < batch_size; ++i) {
+    std::vector<UIEResult> result_list;
+    if (span_idxs[i].size() == 0) {
+      results->push_back({});
+      continue;
+    }
+    auto &&text = texts[i];
+    auto &&prompt = prompts[i];
+    for (int j = 0; j < span_idxs[i].size(); ++j) {
+      auto start = span_idxs[i][j].offset_.first;
+      auto end = span_idxs[i][j].offset_.second;
+      std::string span_text;
+      std::vector<uint32_t> offset_mapping;
+      if (span_idxs[i][j].is_prompt_) {
+        fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+            prompt);
+        fast_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        span_text = prompt.substr(byte_offset.first,
+                                  byte_offset.second - byte_offset.first);
+        // Indicate cls task
+        start = 0;
+        end = 0;
+      } else {
+        fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+            text);
+        fast_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        span_text = text.substr(byte_offset.first,
+                                byte_offset.second - byte_offset.first);
+      }
+      result_list.emplace_back(start, end, probs[i][j], span_text);
+    }
+    results->push_back(result_list);
+  }
+}
+
+void UIEModel::AutoJoiner(const std::vector<std::string> &short_texts,
+                          const std::vector<std::vector<size_t>> &input_mapping,
+                          std::vector<std::vector<UIEResult>> *results) {
+  bool is_cls_task = false;
+  // 1. Detect if it's a cls task
+  for (auto &&short_result : *results) {
+    if (short_result.size() == 0) {
+      continue;
+    } else if (short_result[0].start_ == 0 && short_result[0].end_ == 0) {
+      is_cls_task = true;
+      break;
+    } else {
+      break;
+    }
+  }
+  // 2. Get the final result
+  std::vector<std::vector<UIEResult>> final_result;
+  if (is_cls_task) {
+    for (auto &&input_mapping_item : input_mapping) {
+      std::unordered_map<std::string, std::pair<int, float>> cls_options;
+      for (auto &&result_idx : input_mapping_item) {
+        if ((*results)[result_idx].size() == 0) {
+          continue;
+        }
+        auto &&text = (*results)[result_idx].front().text_;
+        auto &&probability = (*results)[result_idx].front().probability_;
+        if (cls_options.count(text) == 0) {
+          cls_options[text] = std::make_pair(1, probability);
+        } else {
+          cls_options[text].first += 1;
+          cls_options[text].second += probability;
+        }
+      }
+      std::vector<UIEResult> result_list;
+      if (cls_options.size() > 0) {
+        auto max_iter = std::max_element(
+            cls_options.begin(), cls_options.end(),
+            [](const std::pair<std::string, std::pair<int, float>> &lhs,
+               const std::pair<std::string, std::pair<int, float>> &rhs) {
+              return lhs.second.second < rhs.second.second;
+            });
+        result_list.emplace_back(
+            0, 0, max_iter->second.second / max_iter->second.first,
+            max_iter->first);
+      }
+      final_result.push_back(result_list);
+    }
+  } else {
+    for (auto &&input_mapping_item : input_mapping) {
+      size_t offset = 0;
+      std::vector<UIEResult> result_list;
+      for (auto &&result_idx : input_mapping_item) {
+        if (result_idx == 0) {
+          result_list = std::move((*results)[result_idx]);
+          offset += fast_tokenizer::utils::GetUnicodeLenFromUTF8(
+              short_texts[result_idx].c_str(), short_texts[result_idx].size());
+        } else {
+          for (auto &&curr_result : (*results)[result_idx]) {
+            curr_result.start_ += offset;
+            curr_result.end_ += offset;
+          }
+          offset += fast_tokenizer::utils::GetUnicodeLenFromUTF8(
+              short_texts[result_idx].c_str(), short_texts[result_idx].size());
+          result_list.insert(result_list.end(), (*results)[result_idx].begin(),
+                             (*results)[result_idx].end());
+        }
+      }
+      final_result.push_back(result_list);
+    }
+  }
+  *results = std::move(final_result);
+}
+
+bool UIEModel::ConstructTextsAndPrompts(
+    const std::vector<std::string> &raw_texts, const std::string &node_name,
+    const std::vector<std::vector<std::string>> node_prefix,
+    std::vector<std::string> *input_texts, std::vector<std::string> *prompts,
+    std::vector<std::vector<size_t>> *input_mapping_with_raw_texts,
+    std::vector<std::vector<size_t>> *input_mapping) {
+  size_t idx = 0;
+  if (node_prefix.empty()) {
+    for (int i = 0; i < raw_texts.size(); ++i) {
+      input_texts->push_back(raw_texts[i]);
+      prompts->push_back(DBC2SBC(node_name));
+      input_mapping_with_raw_texts->push_back({idx});
+      idx += 1;
+    }
+  } else {
+    for (int i = 0; i < raw_texts.size(); ++i) {
+      if (node_prefix[i].size() == 0) {
+        input_mapping_with_raw_texts->push_back({});
+      } else {
+        for (auto &&pre : node_prefix[i]) {
+          input_texts->push_back(raw_texts[i]);
+          prompts->push_back(DBC2SBC(pre + node_name));
+        }
+        auto prefix_len = node_prefix[i].size();
+        input_mapping_with_raw_texts->push_back({});
+        input_mapping_with_raw_texts->back().resize(prefix_len);
+        std::iota(input_mapping_with_raw_texts->back().begin(),
+                  input_mapping_with_raw_texts->back().end(), idx);
+        idx += prefix_len;
+      }
+    }
+  }
+
+  if (prompts->size() == 0) {
+    return false;
+  }
+
+  // Shortten the input texts and prompts
+  auto max_prompt_iter = std::max_element(
+      prompts->begin(), prompts->end(),
+      [](const std::string &lhs, const std::string &rhs) {
+        auto lhs_ulen = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
+            lhs.c_str(), lhs.length());
+        auto rhs_ulen = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
+            rhs.c_str(), rhs.length());
+        return lhs_ulen < rhs_ulen;
+      });
+  auto max_prompt_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
+      max_prompt_iter->c_str(), max_prompt_iter->length());
+  auto max_predict_len = max_length_ - 3 - max_prompt_len;
+
+  std::vector<std::string> short_texts;
+  AutoSplitter(*input_texts, max_predict_len, &short_texts, input_mapping);
+
+  std::vector<std::string> short_texts_prompts;
+  for (int i = 0; i < input_mapping->size(); ++i) {
+    short_texts_prompts.insert(short_texts_prompts.end(),
+                               (*input_mapping)[i].size(), (*prompts)[i]);
+  }
+  (*input_texts) = std::move(short_texts);
+  (*prompts) = std::move(short_texts_prompts);
+  return true;
+}
+
+void UIEModel::Preprocess(
+    const std::vector<std::string> &input_texts,
+    const std::vector<std::string> &prompts,
+    std::vector<fast_tokenizer::core::Encoding> *encodings,
+    std::vector<ultrainfer::FDTensor> *inputs) {
+  // 1. Tokenize the short texts and short prompts
+  std::vector<fast_tokenizer::core::EncodeInput> text_pair_input;
+  for (int i = 0; i < input_texts.size(); ++i) {
+    text_pair_input.emplace_back(
+        std::pair<std::string, std::string>(prompts[i], input_texts[i]));
+  }
+  tokenizer_.EncodeBatchStrings(text_pair_input, encodings);
+  // 2. Construct the input vector tensor
+  // 2.1 Allocate input tensor
+  int64_t batch_size = input_texts.size();
+  int64_t seq_len = 0;
+  if (batch_size > 0) {
+    seq_len = (*encodings)[0].GetIds().size();
+  }
+  inputs->resize(NumInputsOfRuntime());
+  for (int i = 0; i < NumInputsOfRuntime(); ++i) {
+    (*inputs)[i].Allocate({batch_size, seq_len}, ultrainfer::FDDataType::INT64,
+                          InputInfoOfRuntime(i).name);
+  }
+
+  // 2.2 Set the value of data
+  size_t start = 0;
+  int64_t *input_ids_ptr =
+      reinterpret_cast<int64_t *>((*inputs)[0].MutableData());
+  int64_t *type_ids_ptr =
+      reinterpret_cast<int64_t *>((*inputs)[1].MutableData());
+  int64_t *pos_ids_ptr =
+      reinterpret_cast<int64_t *>((*inputs)[2].MutableData());
+  int64_t *attn_mask_ptr =
+      reinterpret_cast<int64_t *>((*inputs)[3].MutableData());
+
+  for (int i = 0; i < encodings->size(); ++i) {
+    auto &&curr_input_ids = (*encodings)[i].GetIds();
+    auto &&curr_type_ids = (*encodings)[i].GetTypeIds();
+    auto &&curr_attn_mask = (*encodings)[i].GetAttentionMask();
+
+    std::copy(curr_input_ids.begin(), curr_input_ids.end(),
+              input_ids_ptr + start);
+    std::copy(curr_type_ids.begin(), curr_type_ids.end(), type_ids_ptr + start);
+    std::iota(pos_ids_ptr + start, pos_ids_ptr + start + seq_len, 0);
+    std::copy(curr_attn_mask.begin(), curr_attn_mask.end(),
+              attn_mask_ptr + start);
+    start += seq_len;
+  }
+}
+
+void UIEModel::Postprocess(
+    const std::vector<ultrainfer::FDTensor> &outputs,
+    const std::vector<fast_tokenizer::core::Encoding> &encodings,
+    const std::vector<std::string> &short_input_texts,
+    const std::vector<std::string> &short_prompts,
+    const std::vector<std::vector<size_t>> &input_mapping_with_short_text,
+    std::vector<std::vector<UIEResult>> *results) {
+  auto *start_prob = reinterpret_cast<const float *>(outputs[0].Data());
+  auto *end_prob = reinterpret_cast<const float *>(outputs[1].Data());
+
+  std::vector<std::vector<std::pair<int64_t, float>>> start_candidate_idx_prob,
+      end_candidate_idx_prob;
+  GetCandidateIdx(start_prob, outputs[0].shape[0], outputs[0].shape[1],
+                  &start_candidate_idx_prob, position_prob_);
+  GetCandidateIdx(end_prob, outputs[1].shape[0], outputs[1].shape[1],
+                  &end_candidate_idx_prob, position_prob_);
+
+  std::vector<std::vector<fast_tokenizer::core::Offset>> offset_mapping;
+  for (int i = 0; i < encodings.size(); ++i) {
+    auto &&curr_offsets = encodings[i].GetOffsets();
+    offset_mapping.push_back(curr_offsets);
+  }
+
+  SPAN_SET span_set;
+  auto batch_size = outputs[0].shape[0];
+  std::vector<std::vector<float>> probs(batch_size);
+  std::vector<std::vector<SpanIdx>> span_idxs(batch_size);
+  for (int i = 0; i < batch_size; ++i) {
+    GetSpan(start_candidate_idx_prob[i], end_candidate_idx_prob[i], &span_set);
+    GetSpanIdxAndProbs(span_set, offset_mapping[i], &span_idxs[i], &probs[i]);
+    span_set.clear();
+  }
+  ConvertSpanToUIEResult(short_input_texts, short_prompts, span_idxs, probs,
+                         results);
+  AutoJoiner(short_input_texts, input_mapping_with_short_text, results);
+}
+
+void UIEModel::ConstructChildPromptPrefix(
+    const std::vector<std::vector<size_t>> &input_mapping_with_raw_texts,
+    const std::vector<std::vector<UIEResult>> &results_list,
+    std::vector<std::vector<std::string>> *prefix) {
+  prefix->resize(input_mapping_with_raw_texts.size());
+  for (int i = 0; i < input_mapping_with_raw_texts.size(); ++i) {
+    auto &&input_mapping_item = input_mapping_with_raw_texts[i];
+    for (auto &&idx : input_mapping_item) {
+      for (int j = 0; j < results_list[idx].size(); ++j) {
+        std::string prefix_str;
+        if (schema_language_ == SchemaLanguage::ZH) {
+          // Note(zhoushunjie): It means "of" in Chinese.
+          prefix_str = results_list[idx][j].text_ + "\xe7\x9a\x84";
+        } else {
+          prefix_str = " of " + results_list[idx][j].text_;
+        }
+        (*prefix)[i].push_back(prefix_str);
+      }
+    }
+  }
+}
+
+void UIEModel::ConstructChildRelations(
+    const std::vector<std::vector<UIEResult *>> &old_relations,
+    const std::vector<std::vector<size_t>> &input_mapping_with_raw_texts,
+    const std::vector<std::vector<UIEResult>> &results_list,
+    const std::string &node_name,
+    std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>
+        *results,
+    std::vector<std::vector<UIEResult *>> *new_relations) {
+  new_relations->resize(input_mapping_with_raw_texts.size());
+  if (old_relations.size() == 0) {
+    for (int i = 0; i < input_mapping_with_raw_texts.size(); ++i) {
+      auto &&input_mapping_item = input_mapping_with_raw_texts[i];
+      auto &curr_result = (*results)[i];
+      for (auto &&idx : input_mapping_item) {
+        if (results_list[idx].size() == 0) {
+          continue;
+        }
+        if (curr_result.count(node_name) == 0) {
+          curr_result[node_name] = results_list[idx];
+        } else {
+          curr_result[node_name].insert(curr_result[node_name].end(),
+                                        results_list[idx].begin(),
+                                        results_list[idx].end());
+        }
+      }
+      if (curr_result.count(node_name) > 0) {
+        for (auto &&curr_result_ref : curr_result[node_name]) {
+          (*new_relations)[i].push_back(&curr_result_ref);
+        }
+      }
+    }
+  } else {
+    auto &curr_relations = old_relations;
+    for (int i = 0; i < input_mapping_with_raw_texts.size(); ++i) {
+      auto &&input_mapping_item = input_mapping_with_raw_texts[i];
+      for (int j = 0; j < input_mapping_item.size(); ++j) {
+        auto idx = input_mapping_item[j];
+        if (results_list[idx].size() == 0) {
+          continue;
+        }
+        if (curr_relations[i][j]->relation_.count(node_name) == 0) {
+          curr_relations[i][j]->relation_[node_name] = results_list[idx];
+        } else {
+          auto &curr_result = curr_relations[i][j]->relation_[node_name];
+          curr_result.insert(curr_result.end(), results_list[idx].begin(),
+                             results_list[idx].end());
+        }
+      }
+    }
+    for (int i = 0; i < curr_relations.size(); ++i) {
+      for (int j = 0; j < curr_relations[i].size(); ++j) {
+        if (curr_relations[i][j]->relation_.count(node_name)) {
+          auto &curr_relation = curr_relations[i][j]->relation_[node_name];
+          for (auto &&curr_result_ref : curr_relation) {
+            (*new_relations)[i].push_back(&curr_result_ref);
+          }
+        }
+      }
+    }
+  }
+}
+
+void UIEModel::Predict(
+    const std::vector<std::string> &texts,
+    std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>
+        *results) {
+  std::queue<SchemaNode> nodes;
+  for (auto &node : schema_->root_->children_) {
+    nodes.push(node);
+  }
+  results->resize(texts.size());
+  while (!nodes.empty()) {
+    auto node = nodes.front();
+    nodes.pop();
+    std::vector<std::vector<size_t>> input_mapping_with_raw_texts;
+    std::vector<std::vector<size_t>> input_mapping_with_short_text;
+    std::vector<std::string> short_input_texts;
+    std::vector<std::string> short_prompts;
+    // 1. Construct texts and prompts from raw text
+    bool has_prompt = ConstructTextsAndPrompts(
+        texts, node.name_, node.prefix_, &short_input_texts, &short_prompts,
+        &input_mapping_with_raw_texts, &input_mapping_with_short_text);
+    std::vector<std::vector<UIEResult>> results_list;
+    if (has_prompt) {
+      // 2. Convert texts and prompts to FDTensor
+      std::vector<FDTensor> inputs;
+      std::vector<fast_tokenizer::core::Encoding> encodings;
+      Preprocess(short_input_texts, short_prompts, &encodings, &inputs);
+
+      std::vector<std::vector<FDTensor>> inputs_vec(NumInputsOfRuntime());
+      int encoding_size = encodings.size();
+      std::vector<int> num_or_sections;
+      for (int i = 0; i < encoding_size; i += batch_size_) {
+        int actual_batch_size = (std::min)(batch_size_, encoding_size - i);
+        num_or_sections.push_back(actual_batch_size);
+      }
+      for (int i = 0; i < NumInputsOfRuntime(); ++i) {
+        function::Split(inputs[i], num_or_sections, &inputs_vec[i]);
+      }
+
+      // 3. Infer
+      std::vector<ultrainfer::FDTensor> outputs(NumOutputsOfRuntime());
+      std::vector<ultrainfer::FDTensor> outputs0, outputs1;
+
+      for (int i = 0; i < inputs_vec[0].size(); ++i) {
+        std::vector<ultrainfer::FDTensor> curr_inputs(NumInputsOfRuntime());
+        std::vector<ultrainfer::FDTensor> curr_outputs(NumOutputsOfRuntime());
+        for (int j = 0; j < NumInputsOfRuntime(); ++j) {
+          curr_inputs[j] = std::move(inputs_vec[j][i]);
+          curr_inputs[j].name = inputs[j].name;
+        }
+        if (!Infer(curr_inputs, &curr_outputs)) {
+          FDERROR << "Failed to inference while using model:" << ModelName()
+                  << "." << std::endl;
+        }
+        outputs0.push_back(curr_outputs[0]);
+        outputs1.push_back(curr_outputs[1]);
+      }
+      function::Concat(outputs0, &outputs[0]);
+      function::Concat(outputs1, &outputs[1]);
+      // 4. Convert FDTensor to UIEResult
+      Postprocess(outputs, encodings, short_input_texts, short_prompts,
+                  input_mapping_with_short_text, &results_list);
+    }
+    // 5. Construct the new relation of the UIEResult
+    std::vector<std::vector<UIEResult *>> relations;
+    ConstructChildRelations(node.relations_, input_mapping_with_raw_texts,
+                            results_list, node.name_, results, &relations);
+
+    // 6. Construct the next prompt prefix
+    std::vector<std::vector<std::string>> prefix(texts.size());
+    ConstructChildPromptPrefix(input_mapping_with_raw_texts, results_list,
+                               &prefix);
+    for (auto &node_child : node.children_) {
+      node_child.relations_ = relations;
+      node_child.prefix_ = prefix;
+      nodes.push(node_child);
+    }
+  }
+}
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/uie/model.h b/libs/ultrainfer/ultrainfer/text/uie/model.h
new file mode 100755
index 0000000000..ef2b7107f5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/uie/model.h
@@ -0,0 +1,210 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include <ostream>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+using namespace paddlenlp;
+
+namespace ultrainfer {
+namespace text {
+
+struct ULTRAINFER_DECL UIEResult {
+  size_t start_;
+  size_t end_;
+  double probability_;
+  std::string text_;
+  std::unordered_map<std::string, std::vector<UIEResult>> relation_;
+  UIEResult() = default;
+  UIEResult(size_t start, size_t end, double probability, std::string text)
+      : start_(start), end_(end), probability_(probability), text_(text) {}
+  std::string Str() const;
+};
+
+ULTRAINFER_DECL std::ostream &operator<<(std::ostream &os,
+                                         const UIEResult &result);
+ULTRAINFER_DECL std::ostream &operator<<(
+    std::ostream &os,
+    const std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>
+        &results);
+
+struct ULTRAINFER_DECL SchemaNode {
+  std::string name_;
+  std::vector<std::vector<std::string>> prefix_;
+  std::vector<std::vector<UIEResult *>> relations_;
+  std::vector<SchemaNode> children_;
+  SchemaNode() = default;
+  SchemaNode(const SchemaNode &) = default;
+  explicit SchemaNode(const std::string &name,
+                      const std::vector<SchemaNode> &children = {})
+      : name_(name), children_(children) {}
+  void AddChild(const std::string &schema) { children_.emplace_back(schema); }
+  void AddChild(const SchemaNode &schema) { children_.push_back(schema); }
+  void AddChild(const std::string &schema,
+                const std::vector<std::string> &children) {
+    SchemaNode schema_node(schema);
+    for (auto &child : children) {
+      schema_node.children_.emplace_back(child);
+    }
+    children_.emplace_back(schema_node);
+  }
+  void AddChild(const std::string &schema,
+                const std::vector<SchemaNode> &children) {
+    SchemaNode schema_node(schema);
+    schema_node.children_ = children;
+    children_.emplace_back(schema_node);
+  }
+};
+
+enum SchemaLanguage {
+  ZH, // Chinese
+  EN  // English
+};
+
+struct Schema {
+  explicit Schema(const std::string &schema, const std::string &name = "root");
+  explicit Schema(const std::vector<std::string> &schema_list,
+                  const std::string &name = "root");
+  explicit Schema(const std::vector<SchemaNode> &schema_list,
+                  const std::string &name = "root");
+  explicit Schema(const SchemaNode &schema, const std::string &name = "root");
+
+private:
+  void CreateRoot(const std::string &name);
+  std::unique_ptr<SchemaNode> root_;
+  friend class UIEModel;
+};
+
+struct ULTRAINFER_DECL UIEModel : public UltraInferModel {
+public:
+  UIEModel(const std::string &model_file, const std::string &params_file,
+           const std::string &vocab_file, float position_prob,
+           size_t max_length, const std::vector<std::string> &schema,
+           int batch_size,
+           const ultrainfer::RuntimeOption &custom_option =
+               ultrainfer::RuntimeOption(),
+           const ultrainfer::ModelFormat &model_format =
+               ultrainfer::ModelFormat::PADDLE,
+           SchemaLanguage schema_language = SchemaLanguage::ZH);
+  UIEModel(const std::string &model_file, const std::string &params_file,
+           const std::string &vocab_file, float position_prob,
+           size_t max_length, const SchemaNode &schema, int batch_size,
+           const ultrainfer::RuntimeOption &custom_option =
+               ultrainfer::RuntimeOption(),
+           const ultrainfer::ModelFormat &model_format =
+               ultrainfer::ModelFormat::PADDLE,
+           SchemaLanguage schema_language = SchemaLanguage::ZH);
+  UIEModel(const std::string &model_file, const std::string &params_file,
+           const std::string &vocab_file, float position_prob,
+           size_t max_length, const std::vector<SchemaNode> &schema,
+           int batch_size,
+           const ultrainfer::RuntimeOption &custom_option =
+               ultrainfer::RuntimeOption(),
+           const ultrainfer::ModelFormat &model_format =
+               ultrainfer::ModelFormat::PADDLE,
+           SchemaLanguage schema_language = SchemaLanguage::ZH);
+  virtual std::string ModelName() const { return "UIEModel"; }
+  void SetSchema(const std::vector<std::string> &schema);
+  void SetSchema(const std::vector<SchemaNode> &schema);
+  void SetSchema(const SchemaNode &schema);
+
+  bool ConstructTextsAndPrompts(
+      const std::vector<std::string> &raw_texts, const std::string &node_name,
+      const std::vector<std::vector<std::string>> node_prefix,
+      std::vector<std::string> *input_texts, std::vector<std::string> *prompts,
+      std::vector<std::vector<size_t>> *input_mapping_with_raw_texts,
+      std::vector<std::vector<size_t>> *input_mapping_with_short_text);
+  void Preprocess(const std::vector<std::string> &input_texts,
+                  const std::vector<std::string> &prompts,
+                  std::vector<fast_tokenizer::core::Encoding> *encodings,
+                  std::vector<ultrainfer::FDTensor> *inputs);
+  void Postprocess(
+      const std::vector<ultrainfer::FDTensor> &outputs,
+      const std::vector<fast_tokenizer::core::Encoding> &encodings,
+      const std::vector<std::string> &short_input_texts,
+      const std::vector<std::string> &short_prompts,
+      const std::vector<std::vector<size_t>> &input_mapping_with_short_text,
+      std::vector<std::vector<UIEResult>> *results);
+  void ConstructChildPromptPrefix(
+      const std::vector<std::vector<size_t>> &input_mapping_with_raw_texts,
+      const std::vector<std::vector<UIEResult>> &results_list,
+      std::vector<std::vector<std::string>> *prefix);
+  void ConstructChildRelations(
+      const std::vector<std::vector<UIEResult *>> &old_relations,
+      const std::vector<std::vector<size_t>> &input_mapping_with_raw_texts,
+      const std::vector<std::vector<UIEResult>> &results_list,
+      const std::string &node_name,
+      std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>
+          *results,
+      std::vector<std::vector<UIEResult *>> *new_relations);
+  void
+  Predict(const std::vector<std::string> &texts,
+          std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>
+              *results);
+
+protected:
+  using IDX_PROB = std::pair<int64_t, float>;
+  struct IdxProbCmp {
+    bool operator()(const std::pair<IDX_PROB, IDX_PROB> &lhs,
+                    const std::pair<IDX_PROB, IDX_PROB> &rhs) const;
+  };
+  using SPAN_SET = std::set<std::pair<IDX_PROB, IDX_PROB>, IdxProbCmp>;
+  struct SpanIdx {
+    fast_tokenizer::core::Offset offset_;
+    bool is_prompt_;
+  };
+  void SetValidBackend();
+  bool Initialize();
+  void AutoSplitter(const std::vector<std::string> &texts, size_t max_length,
+                    std::vector<std::string> *short_texts,
+                    std::vector<std::vector<size_t>> *input_mapping);
+  void AutoJoiner(const std::vector<std::string> &short_texts,
+                  const std::vector<std::vector<size_t>> &input_mapping,
+                  std::vector<std::vector<UIEResult>> *results);
+  // Get idx of the last dimension in probability arrays, which is greater than
+  // a limitation.
+  void GetCandidateIdx(const float *probs, int64_t batch_size, int64_t seq_len,
+                       std::vector<std::vector<IDX_PROB>> *candidate_idx_prob,
+                       float threshold = 0.5) const;
+  void GetSpan(const std::vector<IDX_PROB> &start_idx_prob,
+               const std::vector<IDX_PROB> &end_idx_prob,
+               SPAN_SET *span_set) const;
+  void GetSpanIdxAndProbs(
+      const SPAN_SET &span_set,
+      const std::vector<fast_tokenizer::core::Offset> &offset_mapping,
+      std::vector<SpanIdx> *span_idxs, std::vector<float> *probs) const;
+  void
+  ConvertSpanToUIEResult(const std::vector<std::string> &texts,
+                         const std::vector<std::string> &prompts,
+                         const std::vector<std::vector<SpanIdx>> &span_idxs,
+                         const std::vector<std::vector<float>> &probs,
+                         std::vector<std::vector<UIEResult>> *results) const;
+  std::unique_ptr<Schema> schema_;
+  size_t max_length_;
+  float position_prob_;
+  int batch_size_;
+  SchemaLanguage schema_language_;
+  fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_;
+};
+
+} // namespace text
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc b/libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc
new file mode 100755
index 0000000000..a7a3ff3fc3
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/text/uie/uie_pybind.cc
@@ -0,0 +1,89 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace py = pybind11;
+
+namespace ultrainfer {
+
+void BindUIE(pybind11::module &m) {
+  py::class_<text::SchemaNode>(m, "SchemaNode")
+      .def(py::init<>())
+      .def(py::init<std::string, std::vector<text::SchemaNode>>(),
+           py::arg("name"), py::arg("children"))
+      .def_readwrite("name", &text::SchemaNode::name_)
+      .def_readwrite("prefix", &text::SchemaNode::prefix_)
+      .def_readwrite("relations", &text::SchemaNode::relations_)
+      .def_readwrite("children", &text::SchemaNode::children_);
+
+  py::enum_<text::SchemaLanguage>(m, "SchemaLanguage", py::arithmetic(),
+                                  "The language of schema.")
+      .value("ZH", text::SchemaLanguage::ZH)
+      .value("EN", text::SchemaLanguage::EN);
+
+  py::class_<text::UIEModel, UltraInferModel>(m, "UIEModel")
+      .def(py::init<std::string, std::string, std::string, float, size_t,
+                    std::vector<std::string>, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
+           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
+           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
+           py::arg("custom_option") = ultrainfer::RuntimeOption(),
+           py::arg("model_format") = ultrainfer::ModelFormat::PADDLE,
+           py::arg("schema_language") = text::SchemaLanguage::ZH)
+      .def(py::init<std::string, std::string, std::string, float, size_t,
+                    std::vector<text::SchemaNode>, int, RuntimeOption,
+                    ModelFormat, text::SchemaLanguage>(),
+           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
+           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
+           py::arg("custom_option") = ultrainfer::RuntimeOption(),
+           py::arg("model_format") = ultrainfer::ModelFormat::PADDLE,
+           py::arg("schema_language") = text::SchemaLanguage::ZH)
+      .def(py::init<std::string, std::string, std::string, float, size_t,
+                    text::SchemaNode, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
+           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
+           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
+           py::arg("custom_option") = ultrainfer::RuntimeOption(),
+           py::arg("model_format") = ultrainfer::ModelFormat::PADDLE,
+           py::arg("schema_language") = text::SchemaLanguage::ZH)
+      .def("set_schema",
+           static_cast<void (text::UIEModel::*)(
+               const std::vector<std::string> &)>(&text::UIEModel::SetSchema),
+           py::arg("schema"))
+      .def("set_schema",
+           static_cast<void (text::UIEModel::*)(
+               const std::vector<text::SchemaNode> &)>(
+               &text::UIEModel::SetSchema),
+           py::arg("schema"))
+      .def("set_schema",
+           static_cast<void (text::UIEModel::*)(const text::SchemaNode &)>(
+               &text::UIEModel::SetSchema),
+           py::arg("schema"))
+      .def(
+          "predict",
+          [](text::UIEModel &self, const std::vector<std::string> &texts) {
+            std::vector<
+                std::unordered_map<std::string, std::vector<text::UIEResult>>>
+                results;
+            self.Predict(texts, &results);
+            return results;
+          },
+          py::arg("text"));
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/ultrainfer_model.cc b/libs/ultrainfer/ultrainfer/ultrainfer_model.cc
new file mode 100755
index 0000000000..0fc3f3d7a5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/ultrainfer_model.cc
@@ -0,0 +1,517 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/ultrainfer_model.h"
+
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+
+std::string Str(const std::vector<Backend> &backends) {
+  std::ostringstream oss;
+  if (backends.size() == 0) {
+    oss << "[]";
+    return oss.str();
+  }
+  oss << "[ " << backends[0];
+  for (int i = 1; i < backends.size(); ++i) {
+    oss << " ," << backends[i];
+  }
+  oss << " ]";
+  return oss.str();
+}
+
+bool CheckBackendSupported(const std::vector<Backend> &backends,
+                           Backend backend) {
+  for (size_t i = 0; i < backends.size(); ++i) {
+    if (backends[i] == backend) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool UltraInferModel::IsSupported(const std::vector<Backend> &backends,
+                                  Backend backend) {
+#ifdef ENABLE_BENCHMARK
+  if (runtime_option.benchmark_option.enable_profile) {
+    FDWARNING << "In benchmark mode, we don't check to see if "
+              << "the backend [" << backend
+              << "] is supported for current model!" << std::endl;
+    return true;
+  } else if (!runtime_option.enable_valid_backend_check) {
+    FDWARNING << "Checking for valid backend is disable, we don't"
+              << " check to see if the backend [" << backend
+              << "] is supported for current model!" << std::endl;
+    return true;
+  }
+  return CheckBackendSupported(backends, backend);
+#else
+  if (!runtime_option.enable_valid_backend_check) {
+    FDWARNING << "Checking for valid backend is disable, we don't"
+              << " check to see if the backend [" << backend
+              << "] is supported for current model!" << std::endl;
+    return true;
+  }
+  return CheckBackendSupported(backends, backend);
+#endif
+}
+
+bool UltraInferModel::InitRuntimeWithSpecifiedBackend() {
+  if (!IsBackendAvailable(runtime_option.backend)) {
+    FDERROR << runtime_option.backend
+            << " is not compiled with current UltraInfer library." << std::endl;
+    return false;
+  }
+
+  bool use_gpu = (runtime_option.device == Device::GPU);
+  bool use_ipu = (runtime_option.device == Device::IPU);
+  bool use_rknpu = (runtime_option.device == Device::RKNPU);
+  bool use_horizon = (runtime_option.device == Device::SUNRISENPU);
+  bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD);
+  bool use_timvx = (runtime_option.device == Device::TIMVX);
+  bool use_ascend = (runtime_option.device == Device::ASCEND);
+  bool use_directml = (runtime_option.device == Device::DIRECTML);
+  bool use_kunlunxin = (runtime_option.device == Device::KUNLUNXIN);
+
+  if (use_gpu) {
+    if (!IsSupported(valid_gpu_backends, runtime_option.backend)) {
+      FDERROR << "The valid gpu backends of model " << ModelName() << " are "
+              << Str(valid_gpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_rknpu) {
+    if (!IsSupported(valid_rknpu_backends, runtime_option.backend)) {
+      FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
+              << Str(valid_rknpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_horizon) {
+    if (!IsSupported(valid_horizon_backends, runtime_option.backend)) {
+      FDERROR << "The valid horizon backends of model " << ModelName()
+              << " are " << Str(valid_horizon_backends) << ", "
+              << runtime_option.backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_sophgotpu) {
+    if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) {
+      FDERROR << "The valid sophgo backends of model " << ModelName() << " are "
+              << Str(valid_sophgonpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_timvx) {
+    if (!IsSupported(valid_timvx_backends, runtime_option.backend)) {
+      FDERROR << "The valid timvx backends of model " << ModelName() << " are "
+              << Str(valid_timvx_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_ascend) {
+    if (!IsSupported(valid_ascend_backends, runtime_option.backend)) {
+      FDERROR << "The valid ascend backends of model " << ModelName() << " are "
+              << Str(valid_ascend_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_directml) {
+    if (!IsSupported(valid_directml_backends, runtime_option.backend)) {
+      FDERROR << "The valid directml backends of model " << ModelName()
+              << " are " << Str(valid_directml_backends) << ", "
+              << runtime_option.backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_kunlunxin) {
+    if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
+      FDERROR << "The valid kunlunxin backends of model " << ModelName()
+              << " are " << Str(valid_kunlunxin_backends) << ", "
+              << runtime_option.backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (use_ipu) {
+    if (!IsSupported(valid_ipu_backends, runtime_option.backend)) {
+      FDERROR << "The valid ipu backends of model " << ModelName() << " are "
+              << Str(valid_ipu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
+      return false;
+    }
+  } else {
+    if (!IsSupported(valid_cpu_backends, runtime_option.backend)) {
+      FDERROR << "The valid cpu backends of model " << ModelName() << " are "
+              << Str(valid_cpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
+      return false;
+    }
+  }
+
+  runtime_ = std::shared_ptr<Runtime>(new Runtime());
+  if (!runtime_->Init(runtime_option)) {
+    return false;
+  }
+  runtime_initialized_ = true;
+  return true;
+}
+
+bool UltraInferModel::InitRuntimeWithSpecifiedDevice() {
+  if (runtime_option.device == Device::CPU) {
+    return CreateCpuBackend();
+  } else if (runtime_option.device == Device::GPU) {
+#ifdef WITH_GPU
+    return CreateGpuBackend();
+#else
+    FDERROR << "The compiled UltraInfer library doesn't support GPU now."
+            << std::endl;
+    return false;
+#endif
+  } else if (runtime_option.device == Device::RKNPU) {
+    return CreateRKNPUBackend();
+  } else if (runtime_option.device == Device::SUNRISENPU) {
+    return CreateHorizonBackend();
+  } else if (runtime_option.device == Device::TIMVX) {
+    return CreateTimVXBackend();
+  } else if (runtime_option.device == Device::ASCEND) {
+    return CreateASCENDBackend();
+  } else if (runtime_option.device == Device::DIRECTML) {
+    return CreateDirectMLBackend();
+  } else if (runtime_option.device == Device::KUNLUNXIN) {
+    return CreateKunlunXinBackend();
+  } else if (runtime_option.device == Device::SOPHGOTPUD) {
+    return CreateSophgoNPUBackend();
+  } else if (runtime_option.device == Device::IPU) {
+#ifdef WITH_IPU
+    return CreateIpuBackend();
+#else
+    FDERROR << "The compiled UltraInfer library doesn't support IPU now."
+            << std::endl;
+    return false;
+#endif
+  }
+  FDERROR << "Only support "
+             "CPU/GPU/IPU/RKNPU/HORIZONNPU/TIMVX/KunlunXin/ASCEND/DirectML now."
+          << std::endl;
+  return false;
+}
+
+bool UltraInferModel::InitRuntime() {
+  if (runtime_initialized_) {
+    FDERROR << "The model is already initialized, cannot be initliazed again."
+            << std::endl;
+    return false;
+  }
+  if (runtime_option.backend != Backend::UNKNOWN) {
+    return InitRuntimeWithSpecifiedBackend();
+  }
+
+  return InitRuntimeWithSpecifiedDevice();
+}
+
+bool UltraInferModel::CreateCpuBackend() {
+  if (valid_cpu_backends.size() == 0) {
+    FDERROR << "There's no valid cpu backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_cpu_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_cpu_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_cpu_backends[i];
+    runtime_ = std::shared_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateGpuBackend() {
+  if (valid_gpu_backends.empty()) {
+    FDERROR << "There's no valid gpu backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_gpu_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_gpu_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_gpu_backends[i];
+    runtime_ = std::shared_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Cannot find an available gpu backend to load this model."
+          << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateRKNPUBackend() {
+  if (valid_rknpu_backends.empty()) {
+    FDERROR << "There's no valid npu backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_rknpu_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_rknpu_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Cannot find an available npu backend to load this model."
+          << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateHorizonBackend() {
+  if (valid_horizon_backends.empty()) {
+    FDERROR << "There's no valid npu backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+  for (size_t i = 0; i < valid_horizon_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_horizon_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_horizon_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Cannot find an available npu backend to load this model."
+          << std::endl;
+  return false;
+}
+bool UltraInferModel::CreateSophgoNPUBackend() {
+  if (valid_sophgonpu_backends.empty()) {
+    FDERROR << "There's no valid npu backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_sophgonpu_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_sophgonpu_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_sophgonpu_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Cannot find an available npu backend to load this model."
+          << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateTimVXBackend() {
+  if (valid_timvx_backends.size() == 0) {
+    FDERROR << "There's no valid timvx backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_timvx_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_timvx_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_timvx_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateKunlunXinBackend() {
+  if (valid_kunlunxin_backends.size() == 0) {
+    FDERROR << "There's no valid KunlunXin backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_kunlunxin_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_kunlunxin_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_kunlunxin_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateASCENDBackend() {
+  if (valid_ascend_backends.size() == 0) {
+    FDERROR << "There's no valid ascend backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_ascend_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_ascend_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_ascend_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateDirectMLBackend() {
+  if (valid_directml_backends.size() == 0) {
+    FDERROR << "There's no valid directml backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_directml_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_directml_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_directml_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid directml backend for model: " << ModelName()
+          << std::endl;
+  return false;
+}
+
+bool UltraInferModel::CreateIpuBackend() {
+  if (valid_ipu_backends.size() == 0) {
+    FDERROR << "There's no valid ipu backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_ipu_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_ipu_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_ipu_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
+  return false;
+}
+
+bool UltraInferModel::Infer(std::vector<FDTensor> &input_tensors,
+                            std::vector<FDTensor> *output_tensors) {
+  TimeCounter tc;
+  if (enable_record_time_of_runtime_) {
+    tc.Start();
+  }
+  auto ret = runtime_->Infer(input_tensors, output_tensors);
+  if (enable_record_time_of_runtime_) {
+    tc.End();
+    if (time_of_runtime_.size() > 50000) {
+      FDWARNING << "There are already 50000 records of runtime, will force to "
+                   "disable record time of runtime now."
+                << std::endl;
+      enable_record_time_of_runtime_ = false;
+    }
+    time_of_runtime_.push_back(tc.Duration());
+  }
+
+  return ret;
+}
+
+bool UltraInferModel::Infer() {
+  return Infer(reused_input_tensors_, &reused_output_tensors_);
+}
+
+std::map<std::string, float> UltraInferModel::PrintStatisInfoOfRuntime() {
+  std::map<std::string, float> statis_info_of_runtime_dict;
+
+  if (time_of_runtime_.size() < 10) {
+    FDWARNING << "PrintStatisInfoOfRuntime require the runtime ran 10 times at "
+                 "least, but now you only ran "
+              << time_of_runtime_.size() << " times." << std::endl;
+  }
+  double warmup_time = 0.0;
+  double remain_time = 0.0;
+  int warmup_iter = time_of_runtime_.size() / 5;
+  for (size_t i = 0; i < time_of_runtime_.size(); ++i) {
+    if (i < warmup_iter) {
+      warmup_time += time_of_runtime_[i];
+    } else {
+      remain_time += time_of_runtime_[i];
+    }
+  }
+  double avg_time = remain_time / (time_of_runtime_.size() - warmup_iter);
+  std::cout << "============= Runtime Statis Info(" << ModelName()
+            << ") =============" << std::endl;
+  std::cout << "Total iterations: " << time_of_runtime_.size() << std::endl;
+  std::cout << "Total time of runtime: " << warmup_time + remain_time << "s."
+            << std::endl;
+  std::cout << "Warmup iterations: " << warmup_iter << std::endl;
+  std::cout << "Total time of runtime in warmup step: " << warmup_time << "s."
+            << std::endl;
+  std::cout << "Average time of runtime exclude warmup step: "
+            << avg_time * 1000 << "ms." << std::endl;
+
+  statis_info_of_runtime_dict["total_time"] = warmup_time + remain_time;
+  statis_info_of_runtime_dict["warmup_time"] = warmup_time;
+  statis_info_of_runtime_dict["remain_time"] = remain_time;
+  statis_info_of_runtime_dict["warmup_iter"] = warmup_iter;
+  statis_info_of_runtime_dict["avg_time"] = avg_time;
+  statis_info_of_runtime_dict["iterations"] = time_of_runtime_.size();
+
+  return statis_info_of_runtime_dict;
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/ultrainfer_model.h b/libs/ultrainfer/ultrainfer/ultrainfer_model.h
new file mode 100755
index 0000000000..d204c700f9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/ultrainfer_model.h
@@ -0,0 +1,189 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/runtime.h"
+
+namespace ultrainfer {
+
+/*! @brief Base model object for all the vision models
+ */
+class ULTRAINFER_DECL UltraInferModel {
+public:
+  /// Get model's name
+  virtual std::string ModelName() const { return "NameUndefined"; }
+
+  /** \brief Inference the model by the runtime. This interface is included in
+   * the `Predict()` function, so we don't call `Infer()` directly in most
+   * common situation
+   */
+  virtual bool Infer(std::vector<FDTensor> &input_tensors,
+                     std::vector<FDTensor> *output_tensors);
+
+  /** \brief Inference the model by the runtime. This interface is using class
+   * member reused_input_tensors_ to do inference and writing results to
+   * reused_output_tensors_
+   */
+  virtual bool Infer();
+
+  RuntimeOption runtime_option;
+  /** \brief Model's valid cpu backends. This member defined all the cpu
+   * backends have successfully tested for the model
+   */
+  std::vector<Backend> valid_cpu_backends = {Backend::ORT};
+  /** Model's valid gpu backends. This member defined all the gpu backends have
+   * successfully tested for the model
+   */
+  std::vector<Backend> valid_gpu_backends = {Backend::ORT};
+  /** Model's valid ipu backends. This member defined all the ipu backends have
+   * successfully tested for the model
+   */
+  std::vector<Backend> valid_ipu_backends = {};
+  /** Model's valid timvx backends. This member defined all the timvx backends
+   * have successfully tested for the model
+   */
+  std::vector<Backend> valid_timvx_backends = {};
+  /** Model's valid directml backends. This member defined all the onnxruntime
+   * directml backends have successfully tested for the model
+   */
+  std::vector<Backend> valid_directml_backends = {};
+  /** Model's valid ascend backends. This member defined all the cann backends
+   * have successfully tested for the model
+   */
+  std::vector<Backend> valid_ascend_backends = {};
+  /** Model's valid KunlunXin xpu backends. This member defined all the
+   * KunlunXin xpu backends have successfully tested for the model
+   */
+  std::vector<Backend> valid_kunlunxin_backends = {};
+  /** Model's valid hardware backends. This member defined all the gpu backends
+   * have successfully tested for the model
+   */
+  std::vector<Backend> valid_rknpu_backends = {};
+  /** Model's valid hardware backends. This member defined all the sophgo npu
+   * backends have successfully tested for the model
+   */
+  std::vector<Backend> valid_horizon_backends = {};
+  std::vector<Backend> valid_sophgonpu_backends = {};
+
+  /// Get number of inputs for this model
+  virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
+  /// Get number of outputs for this model
+  virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
+  /// Get input information for this model
+  virtual TensorInfo InputInfoOfRuntime(int index) {
+    return runtime_->GetInputInfo(index);
+  }
+  /// Get output information for this model
+  virtual TensorInfo OutputInfoOfRuntime(int index) {
+    return runtime_->GetOutputInfo(index);
+  }
+  /// Check if the model is initialized successfully
+  virtual bool Initialized() const {
+    return runtime_initialized_ && initialized;
+  }
+
+  /** \brief This is a debug interface, used to record the time of runtime
+   * (backend + h2d + d2h)
+   *
+   * example code @code
+   * auto model = ultrainfer::vision::PPYOLOE("model.pdmodel",
+   * "model.pdiparams", "infer_cfg.yml"); if (!model.Initialized()) { std::cerr
+   * << "Failed to initialize." << std::endl; return -1;
+   * }
+   * model.EnableRecordTimeOfRuntime();
+   * cv::Mat im = cv::imread("test.jpg");
+   * for (auto i = 0; i < 1000; ++i) {
+   *   ultrainfer::vision::DetectionResult result;
+   *   model.Predict(&im, &result);
+   * }
+   * model.PrintStatisInfoOfRuntime();
+   * @endcode After called the `PrintStatisInfoOfRuntime()`, the statistical
+   * information of runtime will be printed in the console
+   */
+  virtual void EnableRecordTimeOfRuntime() {
+    time_of_runtime_.clear();
+    std::vector<double>().swap(time_of_runtime_);
+    enable_record_time_of_runtime_ = true;
+  }
+
+  /** \brief Disable to record the time of runtime, see
+   * `EnableRecordTimeOfRuntime()` for more detail
+   */
+  virtual void DisableRecordTimeOfRuntime() {
+    enable_record_time_of_runtime_ = false;
+  }
+
+  /** \brief Print the statistic information of runtime in the console, see
+   * function `EnableRecordTimeOfRuntime()` for more detail
+   */
+  virtual std::map<std::string, float> PrintStatisInfoOfRuntime();
+
+  /** \brief Check if the `EnableRecordTimeOfRuntime()` method is enabled.
+   */
+  virtual bool EnabledRecordTimeOfRuntime() {
+    return enable_record_time_of_runtime_;
+  }
+  /** \brief Get profile time of Runtime after the profile process is done.
+   */
+  virtual double GetProfileTime() { return runtime_->GetProfileTime(); }
+  /** \brief Release reused input/output buffers
+   */
+  virtual void ReleaseReusedBuffer() {
+    std::vector<FDTensor>().swap(reused_input_tensors_);
+    std::vector<FDTensor>().swap(reused_output_tensors_);
+  }
+
+  virtual ultrainfer::Runtime *CloneRuntime() { return runtime_->Clone(); }
+
+  virtual bool SetRuntime(ultrainfer::Runtime *clone_runtime) {
+    runtime_ = std::unique_ptr<Runtime>(clone_runtime);
+    return true;
+  }
+
+  virtual std::unique_ptr<UltraInferModel> Clone() {
+    FDERROR << ModelName() << " doesn't support Cone() now." << std::endl;
+    return nullptr;
+  }
+
+protected:
+  virtual bool InitRuntime();
+
+  bool initialized = false;
+  // Reused input tensors
+  std::vector<FDTensor> reused_input_tensors_;
+  // Reused output tensors
+  std::vector<FDTensor> reused_output_tensors_;
+
+private:
+  bool InitRuntimeWithSpecifiedBackend();
+  bool InitRuntimeWithSpecifiedDevice();
+  bool CreateCpuBackend();
+  bool CreateGpuBackend();
+  bool CreateIpuBackend();
+  bool CreateRKNPUBackend();
+  bool CreateHorizonBackend();
+  bool CreateSophgoNPUBackend();
+  bool CreateTimVXBackend();
+  bool CreateKunlunXinBackend();
+  bool CreateASCENDBackend();
+  bool CreateDirectMLBackend();
+  bool IsSupported(const std::vector<Backend> &backends, Backend backend);
+
+  std::shared_ptr<Runtime> runtime_;
+  bool runtime_initialized_ = false;
+  // whether to record inference time
+  bool enable_record_time_of_runtime_ = false;
+  std::vector<double> time_of_runtime_;
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/utils/axis_utils.h b/libs/ultrainfer/ultrainfer/utils/axis_utils.h
new file mode 100755
index 0000000000..53a9aada1a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/utils/axis_utils.h
@@ -0,0 +1,52 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+namespace ultrainfer {
+
+static inline int CanonicalAxis(const int axis, const int rank) {
+  if (axis < 0) {
+    return axis + rank;
+  }
+  return axis;
+}
+
+static inline int SizeToAxis(const int axis, const std::vector<int64_t> &dims) {
+  int size = 1;
+  for (int i = 0; i < axis; i++) {
+    size *= dims[i];
+  }
+  return size;
+}
+
+static inline int SizeFromAxis(const int axis,
+                               const std::vector<int64_t> &dims) {
+  int size = 1;
+  for (int i = axis; i < dims.size(); i++) {
+    size *= dims[i];
+  }
+  return size;
+}
+
+static inline int SizeOutAxis(const int axis,
+                              const std::vector<int64_t> &dims) {
+  int size = 1;
+  for (int i = axis + 1; i < dims.size(); i++) {
+    size *= dims[i];
+  }
+  return size;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/utils/path.h b/libs/ultrainfer/ultrainfer/utils/path.h
new file mode 100755
index 0000000000..17f5c0d0cf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/utils/path.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <fstream>
+#include <string>
+#include <vector>
+#ifdef _MSC_VER
+#define PATH_SEP "\\"
+#else
+#define PATH_SEP "/"
+#endif
+
+namespace ultrainfer {
+
+inline std::string PathJoin(const std::vector<std::string> &paths,
+                            const std::string &sep = PATH_SEP) {
+  if (paths.size() == 1) {
+    return paths[0];
+  }
+  std::string filepath = "";
+  for (const auto &path : paths) {
+    if (filepath == "") {
+      filepath += path;
+      continue;
+    }
+    if (path[0] == sep[0] || filepath.back() == sep[0]) {
+      filepath += path;
+    } else {
+      filepath += sep + path;
+    }
+  }
+  return filepath;
+}
+
+inline std::string PathJoin(const std::string &folder,
+                            const std::string &filename,
+                            const std::string &sep = PATH_SEP) {
+  return PathJoin(std::vector<std::string>{folder, filename}, sep);
+}
+
+inline std::string GetDirFromPath(const std::string &path) {
+  auto pos = path.find_last_of(PATH_SEP);
+  if (pos == std::string::npos) {
+    return "";
+  }
+  // The root path in UNIX systems
+  if (pos == 0) {
+    return "/";
+  }
+  return path.substr(0, pos);
+}
+
+inline bool CheckFileExists(const std::string &path) {
+  std::fstream fin(path, std::ios::in);
+  if (!fin) {
+    return false;
+  }
+  return true;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/utils/perf.h b/libs/ultrainfer/ultrainfer/utils/perf.h
new file mode 100755
index 0000000000..0faabfcdc6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/utils/perf.h
@@ -0,0 +1,49 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/utils/utils.h"
+#include <chrono> // NOLINT
+
+namespace ultrainfer {
+
+class ULTRAINFER_DECL TimeCounter {
+public:
+  void Start() { begin_ = std::chrono::system_clock::now(); }
+
+  void End() { end_ = std::chrono::system_clock::now(); }
+
+  double Duration() {
+    auto duration =
+        std::chrono::duration_cast<std::chrono::microseconds>(end_ - begin_);
+    return static_cast<double>(duration.count()) *
+           std::chrono::microseconds::period::num /
+           std::chrono::microseconds::period::den;
+  }
+
+  void PrintInfo(const std::string &prefix = "TimeCounter: ",
+                 bool print_out = true) {
+    if (!print_out) {
+      return;
+    }
+    FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl;
+  }
+
+private:
+  std::chrono::time_point<std::chrono::system_clock> begin_;
+  std::chrono::time_point<std::chrono::system_clock> end_;
+};
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/utils/unique_ptr.h b/libs/ultrainfer/ultrainfer/utils/unique_ptr.h
new file mode 100755
index 0000000000..9f02d5d792
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/utils/unique_ptr.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <memory>
+
+namespace ultrainfer {
+namespace utils {
+// Trait to select overloads and return types for MakeUnique.
+template <typename T> struct MakeUniqueResult {
+  using scalar = std::unique_ptr<T>;
+};
+template <typename T> struct MakeUniqueResult<T[]> {
+  using array = std::unique_ptr<T[]>;
+};
+template <typename T, size_t N> struct MakeUniqueResult<T[N]> {
+  using invalid = void;
+};
+
+// MakeUnique<T>(...) is an early implementation of C++14 std::make_unique.
+// It is designed to be 100% compatible with std::make_unique so that the
+// eventual switchover will be a simple renaming operation.
+template <typename T, typename... Args>
+typename MakeUniqueResult<T>::scalar make_unique(Args &&...args) { // NOLINT
+  return std::unique_ptr<T>(
+      new T(std::forward<Args>(args)...)); // NOLINT(build/c++11)
+}
+
+// Overload for array of unknown bound.
+// The allocation of arrays needs to use the array form of new,
+// and cannot take element constructor arguments.
+template <typename T>
+typename MakeUniqueResult<T>::array make_unique(size_t n) {
+  return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
+}
+
+// Reject arrays of known bound.
+template <typename T, typename... Args>
+typename MakeUniqueResult<T>::invalid
+make_unique(Args &&.../* args */) = delete; // NOLINT
+
+} // namespace utils
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/utils/utils.cc b/libs/ultrainfer/ultrainfer/utils/utils.cc
new file mode 100755
index 0000000000..71751c70f7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/utils/utils.cc
@@ -0,0 +1,68 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/utils/utils.h"
+
+#include <sstream>
+
+namespace ultrainfer {
+
+bool FDLogger::enable_info = true;
+bool FDLogger::enable_warning = true;
+
+void SetLogger(bool enable_info, bool enable_warning) {
+  FDLogger::enable_info = enable_info;
+  FDLogger::enable_warning = enable_warning;
+}
+
+FDLogger::FDLogger(bool verbose, const std::string &prefix) {
+  verbose_ = verbose;
+  line_ = "";
+  prefix_ = prefix;
+}
+
+FDLogger &FDLogger::operator<<(std::ostream &(*os)(std::ostream &)) {
+  if (!verbose_) {
+    return *this;
+  }
+  std::cout << prefix_ << " " << line_ << std::endl;
+  line_ = "";
+  return *this;
+}
+
+bool ReadBinaryFromFile(const std::string &file, std::string *contents) {
+  std::ifstream fin(file, std::ios::in | std::ios::binary);
+  if (!fin.is_open()) {
+    FDERROR << "Failed to open file: " << file << " to read." << std::endl;
+    return false;
+  }
+  fin.seekg(0, std::ios::end);
+  contents->clear();
+  contents->resize(fin.tellg());
+  fin.seekg(0, std::ios::beg);
+  fin.read(&(contents->at(0)), contents->size());
+  fin.close();
+  return true;
+}
+
+std::vector<int64_t> GetStride(const std::vector<int64_t> &dims) {
+  auto dims_size = dims.size();
+  std::vector<int64_t> result(dims_size, 1);
+  for (int i = dims_size - 2; i >= 0; --i) {
+    result[i] = result[i + 1] * dims[i + 1];
+  }
+  return result;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/utils/utils.h b/libs/ultrainfer/ultrainfer/utils/utils.h
new file mode 100755
index 0000000000..b3af7fc371
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/utils/utils.h
@@ -0,0 +1,234 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdio>
+#include <stdlib.h>
+
+#include <fstream>
+#include <iostream>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#if defined(_WIN32)
+#ifdef ULTRAINFER_LIB
+#define ULTRAINFER_DECL __declspec(dllexport)
+#else
+#define ULTRAINFER_DECL __declspec(dllimport)
+#endif // ULTRAINFER_LIB
+#else
+#define ULTRAINFER_DECL __attribute__((visibility("default")))
+#endif // _WIN32
+
+namespace ultrainfer {
+
+class ULTRAINFER_DECL FDLogger {
+public:
+  static bool enable_info;
+  static bool enable_warning;
+
+  FDLogger() {
+    line_ = "";
+    prefix_ = "[UltraInfer]";
+    verbose_ = true;
+  }
+  explicit FDLogger(bool verbose, const std::string &prefix = "[UltraInfer]");
+
+  template <typename T> FDLogger &operator<<(const T &val) {
+    if (!verbose_) {
+      return *this;
+    }
+    std::stringstream ss;
+    ss << val;
+    line_ += ss.str();
+    return *this;
+  }
+
+  FDLogger &operator<<(std::ostream &(*os)(std::ostream &));
+
+  ~FDLogger() {
+    if (verbose_ && line_ != "") {
+      std::cout << line_ << std::endl;
+    }
+  }
+
+private:
+  std::string line_;
+  std::string prefix_;
+  bool verbose_ = true;
+};
+
+ULTRAINFER_DECL bool ReadBinaryFromFile(const std::string &file,
+                                        std::string *contents);
+
+#ifndef __REL_FILE__
+#define __REL_FILE__ __FILE__
+#endif
+
+#define FDERROR                                                                \
+  FDLogger(true, "[ERROR]")                                                    \
+      << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
+
+#define FDWARNING                                                              \
+  FDLogger(ultrainfer::FDLogger::enable_warning, "[WARNING]")                  \
+      << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
+
+#define FDINFO                                                                 \
+  FDLogger(ultrainfer::FDLogger::enable_info, "[INFO]")                        \
+      << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
+
+#define FDASSERT(condition, format, ...)                                       \
+  if (!(condition)) {                                                          \
+    int n = std::snprintf(nullptr, 0, format, ##__VA_ARGS__);                  \
+    std::vector<char> buffer(n + 1);                                           \
+    std::snprintf(buffer.data(), n + 1, format, ##__VA_ARGS__);                \
+    FDERROR << buffer.data() << std::endl;                                     \
+    std::abort();                                                              \
+  }
+
+///////// Basic Marco ///////////
+
+#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...)      \
+  case enum_type: {                                                            \
+    using HINT = type;                                                         \
+    __VA_ARGS__();                                                             \
+    break;                                                                     \
+  }
+
+#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...)                       \
+  FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__)
+
+// Visit different data type to match the corresponding function of FDTensor
+#define FD_VISIT_ALL_TYPES(TYPE, NAME, ...)                                    \
+  [&] {                                                                        \
+    const auto &__dtype__ = TYPE;                                              \
+    switch (__dtype__) {                                                       \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::UINT8, uint8_t,     \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::BOOL, bool,         \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT32, int32_t,     \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT64, int64_t,     \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP32, float,        \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP64, double,       \
+                           __VA_ARGS__)                                        \
+    default:                                                                   \
+      FDASSERT(false,                                                          \
+               "Invalid enum data type. Expect to accept data "                \
+               "type BOOL, INT32, "                                            \
+               "INT64, FP32, FP64, but receive type %s.",                      \
+               Str(__dtype__).c_str());                                        \
+    }                                                                          \
+  }()
+
+#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...)                              \
+  [&] {                                                                        \
+    const auto &__dtype__ = TYPE;                                              \
+    switch (__dtype__) {                                                       \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT32, int32_t,     \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT64, int64_t,     \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP32, float,        \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP64, double,       \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::UINT8, uint8_t,     \
+                           __VA_ARGS__)                                        \
+    default:                                                                   \
+      FDASSERT(false,                                                          \
+               "Invalid enum data type. Expect to accept data type INT32, "    \
+               "INT64, FP32, FP64, UINT8 but receive type %s.",                \
+               Str(__dtype__).c_str());                                        \
+    }                                                                          \
+  }()
+
+#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...)                                  \
+  [&] {                                                                        \
+    const auto &__dtype__ = TYPE;                                              \
+    switch (__dtype__) {                                                       \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP32, float,        \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::FP64, double,       \
+                           __VA_ARGS__)                                        \
+    default:                                                                   \
+      FDASSERT(false,                                                          \
+               "Invalid enum data type. Expect to accept data type FP32, "     \
+               "FP64, but receive type %s.",                                   \
+               Str(__dtype__).c_str());                                        \
+    }                                                                          \
+  }()
+
+#define FD_VISIT_INT_TYPES(TYPE, NAME, ...)                                    \
+  [&] {                                                                        \
+    const auto &__dtype__ = TYPE;                                              \
+    switch (__dtype__) {                                                       \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT32, int32_t,     \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::INT64, int64_t,     \
+                           __VA_ARGS__)                                        \
+      FD_PRIVATE_CASE_TYPE(NAME, ::ultrainfer::FDDataType::UINT8, uint8_t,     \
+                           __VA_ARGS__)                                        \
+    default:                                                                   \
+      FDASSERT(false,                                                          \
+               "Invalid enum data type. Expect to accept data type INT32, "    \
+               "INT64, UINT8 but receive type %s.",                            \
+               Str(__dtype__).c_str());                                        \
+    }                                                                          \
+  }()
+
+ULTRAINFER_DECL std::vector<int64_t>
+GetStride(const std::vector<int64_t> &dims);
+
+template <typename T> std::string Str(const std::vector<T> &shape) {
+  std::ostringstream oss;
+  oss << "[ " << shape[0];
+  for (size_t i = 1; i < shape.size(); ++i) {
+    oss << " ," << shape[i];
+  }
+  oss << " ]";
+  return oss.str();
+}
+
+/// Set behaviour of logging while using UltraInfer
+ULTRAINFER_DECL void SetLogger(bool enable_info = true,
+                               bool enable_warning = true);
+
+template <typename T>
+void CalculateStatisInfo(const void *src_ptr, int size, double *mean,
+                         double *max, double *min) {
+  const T *ptr = static_cast<const T *>(src_ptr);
+  *mean = static_cast<double>(0);
+  *max = static_cast<double>(-99999999);
+  *min = static_cast<double>(99999999);
+  for (int i = 0; i < size; ++i) {
+    if (*(ptr + i) > *max) {
+      *max = *(ptr + i);
+    }
+    if (*(ptr + i) < *min) {
+      *min = *(ptr + i);
+    }
+    *mean += *(ptr + i);
+  }
+  *mean = *mean / size;
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision.h b/libs/ultrainfer/ultrainfer/vision.h
new file mode 100755
index 0000000000..6c9a7ed69d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "ultrainfer/core/config.h"
+#ifdef ENABLE_VISION
+#include "ultrainfer/vision/classification/contrib/resnet.h"
+#include "ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h"
+#include "ultrainfer/vision/classification/ppcls/model.h"
+#include "ultrainfer/vision/classification/ppshitu/ppshituv2_det.h"
+#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h"
+#include "ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h"
+#include "ultrainfer/vision/detection/contrib/nanodet_plus.h"
+#include "ultrainfer/vision/detection/contrib/rknpu2/model.h"
+#include "ultrainfer/vision/detection/contrib/scaledyolov4.h"
+#include "ultrainfer/vision/detection/contrib/yolor.h"
+#include "ultrainfer/vision/detection/contrib/yolov5/yolov5.h"
+#include "ultrainfer/vision/detection/contrib/yolov5lite.h"
+#include "ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h"
+#include "ultrainfer/vision/detection/contrib/yolov6.h"
+#include "ultrainfer/vision/detection/contrib/yolov7/yolov7.h"
+#include "ultrainfer/vision/detection/contrib/yolov7end2end_ort.h"
+#include "ultrainfer/vision/detection/contrib/yolov7end2end_trt.h"
+#include "ultrainfer/vision/detection/contrib/yolov8/yolov8.h"
+#include "ultrainfer/vision/detection/contrib/yolox.h"
+#include "ultrainfer/vision/detection/ppdet/model.h"
+#include "ultrainfer/vision/facealign/contrib/face_landmark_1000.h"
+#include "ultrainfer/vision/facealign/contrib/pfld.h"
+#include "ultrainfer/vision/facealign/contrib/pipnet.h"
+#include "ultrainfer/vision/facedet/contrib/centerface/centerface.h"
+#include "ultrainfer/vision/facedet/contrib/retinaface.h"
+#include "ultrainfer/vision/facedet/contrib/scrfd.h"
+#include "ultrainfer/vision/facedet/contrib/ultraface.h"
+#include "ultrainfer/vision/facedet/contrib/yolov5face.h"
+#include "ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h"
+#include "ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h"
+#include "ultrainfer/vision/faceid/contrib/adaface/adaface.h"
+#include "ultrainfer/vision/faceid/contrib/insightface/model.h"
+#include "ultrainfer/vision/generation/contrib/animegan.h"
+#include "ultrainfer/vision/headpose/contrib/fsanet.h"
+#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose.h"
+#include "ultrainfer/vision/matting/contrib/modnet.h"
+#include "ultrainfer/vision/matting/contrib/rvm.h"
+#include "ultrainfer/vision/matting/ppmatting/ppmatting.h"
+#include "ultrainfer/vision/ocr/ppocr/classifier.h"
+#include "ultrainfer/vision/ocr/ppocr/dbcurvedetector.h"
+#include "ultrainfer/vision/ocr/ppocr/dbdetector.h"
+#include "ultrainfer/vision/ocr/ppocr/ppocr_v2.h"
+#include "ultrainfer/vision/ocr/ppocr/ppocr_v3.h"
+#include "ultrainfer/vision/ocr/ppocr/ppocr_v4.h"
+#include "ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h"
+#include "ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h"
+#include "ultrainfer/vision/ocr/ppocr/recognizer.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_layout.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_table.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+#include "ultrainfer/vision/ocr/ppocr/uvdocwarpper.h"
+#include "ultrainfer/vision/perception/paddle3d/caddn/caddn.h"
+#include "ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h"
+#include "ultrainfer/vision/perception/paddle3d/petr/petr.h"
+#include "ultrainfer/vision/perception/paddle3d/smoke/smoke.h"
+#include "ultrainfer/vision/segmentation/ppseg/model.h"
+#include "ultrainfer/vision/sr/ppsr/model.h"
+#include "ultrainfer/vision/tracking/pptracking/model.h"
+
+#endif
+
+#include "ultrainfer/vision/visualize/visualize.h"
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc
new file mode 100755
index 0000000000..413e4879c2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/classification_pybind.cc
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindYOLOv5Cls(pybind11::module &m);
+void BindPaddleClas(pybind11::module &m);
+void BindPPShiTuV2(pybind11::module &m);
+void BindResNet(pybind11::module &m);
+
+void BindClassification(pybind11::module &m) {
+  auto classification_module =
+      m.def_submodule("classification", "Image classification models.");
+
+  BindYOLOv5Cls(classification_module);
+  BindPaddleClas(classification_module);
+  BindPPShiTuV2(classification_module);
+  BindResNet(classification_module);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc
new file mode 100755
index 0000000000..465723ae04
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.cc
@@ -0,0 +1,135 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/contrib/resnet.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+ResNet::ResNet(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  // In constructor, the 3 steps below are necessary.
+  // 1. set the Backend 2. set RuntimeOption 3. call Initialize()
+
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool ResNet::Initialize() {
+  // In this function, the 3 steps below are necessary.
+  // 1. assign values to the global variables 2. call InitRuntime()
+
+  size = {224, 224};
+  mean_vals = {0.485f, 0.456f, 0.406f};
+  std_vals = {0.229f, 0.224f, 0.225f};
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool ResNet::Preprocess(Mat *mat, FDTensor *output) {
+  // In this function, the preprocess need be implemented according to the
+  // original Repos,
+  // The result of preprocess has to be saved in FDTensor variable, because the
+  // input of Infer() need to be std::vector<FDTensor>.
+  // 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into
+  // FDTensor variable.
+
+  if (mat->Height() != size[0] || mat->Width() != size[1]) {
+    int interp = cv::INTER_LINEAR;
+    Resize::Run(mat, size[1], size[0], -1, -1, interp);
+  }
+
+  BGR2RGB::Run(mat);
+  Normalize::Run(mat, mean_vals, std_vals);
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool ResNet::Postprocess(FDTensor &infer_result, ClassifyResult *result,
+                         int topk) {
+  // In this function, the postprocess need be implemented according to the
+  // original Repos,
+  // Finally the reslut of postprocess should be saved in ClassifyResult
+  // variable.
+  // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult
+  // variable.
+
+  int num_classes = infer_result.shape[1];
+  function::Softmax(infer_result, &infer_result);
+  const float *infer_result_buffer =
+      reinterpret_cast<float *>(infer_result.Data());
+  topk = std::min(num_classes, topk);
+  result->label_ids =
+      utils::TopKIndices(infer_result_buffer, num_classes, topk);
+  result->scores.resize(topk);
+  for (int i = 0; i < topk; ++i) {
+    result->scores[i] = *(infer_result_buffer + result->label_ids[i]);
+  }
+  return true;
+}
+
+bool ResNet::Predict(cv::Mat *im, ClassifyResult *result, int topk) {
+  // In this function, the Preprocess(), Infer(), and Postprocess() are called
+  // sequentially.
+
+  Mat mat(*im);
+  std::vector<FDTensor> processed_data(1);
+  if (!Preprocess(&mat, &(processed_data[0]))) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+  processed_data[0].name = InputInfoOfRuntime(0).name;
+
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(processed_data, &output_tensors)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors[0], result, topk)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h
new file mode 100755
index 0000000000..4fbf6c99d9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet.h
@@ -0,0 +1,86 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+// The namespace shoulde be
+// ultrainfer::vision::classification (ultrainfer::vision::${task})
+namespace ultrainfer {
+namespace vision {
+/** \brief All object classification model APIs are defined inside this
+ * namespace
+ *
+ */
+namespace classification {
+/*! @brief Torchvision ResNet series model
+ */
+class ULTRAINFER_DECL ResNet : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./resnet50.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  ResNet(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  virtual std::string ModelName() const { return "ResNet"; }
+  /** \brief Predict for the input "im", the result will be saved in "result".
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result Saving the inference result.
+   * \param[in] topk The length of return values, e.g., if topk==2, the result
+   * will include the 2 most possible class label for input image.
+   */
+  virtual bool Predict(cv::Mat *im, ClassifyResult *result, int topk = 1);
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {224, 224}
+  */
+  std::vector<int> size;
+  /*! @brief
+  Mean parameters for normalize, size should be the the same as channels,
+  default mean_vals = {0.485f, 0.456f, 0.406f}
+  */
+  std::vector<float> mean_vals;
+  /*! @brief
+  Std parameters for normalize, size should be the the same as channels, default
+  std_vals = {0.229f, 0.224f, 0.225f}
+  */
+  std::vector<float> std_vals;
+
+private:
+  /*! @brief Initialize for ResNet model, assign values to the global variables
+   * and call InitRuntime()
+   */
+  bool Initialize();
+  /// PreProcessing for the input "mat", the result will be saved in "outputs".
+  bool Preprocess(Mat *mat, FDTensor *outputs);
+  /*! @brief PostProcessing for the input "infer_result", the result will be
+   * saved in "result".
+   */
+  bool Postprocess(FDTensor &infer_result, ClassifyResult *result,
+                   int topk = 1);
+};
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc
new file mode 100755
index 0000000000..8139ba57bd
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/resnet_pybind.cc
@@ -0,0 +1,39 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+// namespace should be  `ultrainfer`
+namespace ultrainfer {
+// the name of Pybind function should be Bind${model_name}
+void BindResNet(pybind11::module &m) {
+  // the constructor and the predict funtion are necessary
+  // the constructor is used to initialize the python model class.
+  // the necessary public functions and variables like `size`, `mean_vals`
+  // should also be binded.
+  pybind11::class_<vision::classification::ResNet, UltraInferModel>(m, "ResNet")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::classification::ResNet &self, pybind11::array &data,
+              int topk = 1) {
+             auto mat = PyArrayToCvMat(data);
+             vision::ClassifyResult res;
+             self.Predict(&mat, &res, topk);
+             return res;
+           })
+      .def_readwrite("size", &vision::classification::ResNet::size)
+      .def_readwrite("mean_vals", &vision::classification::ResNet::mean_vals)
+      .def_readwrite("std_vals", &vision::classification::ResNet::std_vals);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc
new file mode 100755
index 0000000000..c86a7b17c1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.cc
@@ -0,0 +1,58 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+YOLOv5ClsPostprocessor::YOLOv5ClsPostprocessor() { topk_ = 1; }
+
+bool YOLOv5ClsPostprocessor::Run(
+    const std::vector<FDTensor> &tensors, std::vector<ClassifyResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = tensors[0].shape[0];
+  FDTensor infer_result = tensors[0];
+  FDTensor infer_result_softmax;
+  function::Softmax(infer_result, &infer_result_softmax, 1);
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+    (*results)[bs].Clear();
+    // output (1,1000) score classnum 1000
+    int num_classes = infer_result_softmax.shape[1];
+    const float *infer_result_buffer =
+        reinterpret_cast<const float *>(infer_result_softmax.Data()) +
+        bs * infer_result_softmax.shape[1];
+    topk_ = std::min(num_classes, topk_);
+    (*results)[bs].label_ids =
+        utils::TopKIndices(infer_result_buffer, num_classes, topk_);
+    (*results)[bs].scores.resize(topk_);
+    for (int i = 0; i < topk_; ++i) {
+      (*results)[bs].scores[i] =
+          *(infer_result_buffer + (*results)[bs].label_ids[i]);
+    }
+
+    if ((*results)[bs].label_ids.size() == 0) {
+      return true;
+    }
+  }
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h
new file mode 100755
index 0000000000..1ad4d5537b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h
@@ -0,0 +1,55 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace classification {
+/*! @brief Postprocessor object for YOLOv5Cls serials model.
+ */
+class ULTRAINFER_DECL YOLOv5ClsPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for YOLOv5Cls serials model
+   */
+  YOLOv5ClsPostprocessor();
+
+  /** \brief Process the result of runtime and fill to ClassifyResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of classification
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &tensors,
+      std::vector<ClassifyResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set topk, default 1
+  void SetTopK(const int &topk) { topk_ = topk; }
+
+  /// Get topk, default 1
+  float GetTopK() const { return topk_; }
+
+protected:
+  int topk_;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc
new file mode 100755
index 0000000000..81f028e312
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() {
+  size_ = {224, 224}; //{h,w}
+}
+
+bool YOLOv5ClsPreprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+
+  // process after image load
+  double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
+                                             static_cast<float>(mat->Width()));
+
+  // yolov5cls's preprocess steps
+  // 1. CenterCrop
+  // 2. Normalize
+  // CenterCrop
+  int crop_size = std::min(mat->Height(), mat->Width());
+  CenterCrop::Run(mat, crop_size, crop_size);
+  Resize::Run(mat, size_[0], size_[1], -1, -1, cv::INTER_LINEAR);
+  // Normalize
+  BGR2RGB::Run(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+  std::vector<float> mean = {0.485f, 0.456f, 0.406f};
+  std::vector<float> std = {0.229f, 0.224f, 0.225f};
+  NormalizeAndPermute::Run(mat, mean, std, false);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOv5ClsPreprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h
new file mode 100755
index 0000000000..9eaf06c9fd
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace classification {
+/*! @brief Preprocessor object for YOLOv5Cls serials model.
+ */
+class ULTRAINFER_DECL YOLOv5ClsPreprocessor {
+public:
+  /** \brief Create a preprocessor instance for YOLOv5Cls serials model
+   */
+  YOLOv5ClsPreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {224, 224}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {224, 224}
+  std::vector<int> GetSize() const { return size_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  // target size, tuple of (width, height), default size = {224, 224}
+  std::vector<int> size_;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc
new file mode 100755
index 0000000000..bed7fee5e8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.cc
@@ -0,0 +1,83 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+YOLOv5Cls::YOLOv5Cls(const std::string &model_file,
+                     const std::string &params_file,
+                     const RuntimeOption &custom_option,
+                     const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv5Cls::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv5Cls::Predict(const cv::Mat &im, ClassifyResult *result) {
+  std::vector<ClassifyResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool YOLOv5Cls::BatchPredict(const std::vector<cv::Mat> &images,
+                             std::vector<ClassifyResult> *results) {
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h
new file mode 100755
index 0000000000..7a3d3b52b9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/classification/contrib/yolov5cls/postprocessor.h"
+#include "ultrainfer/vision/classification/contrib/yolov5cls/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+/*! @brief YOLOv5Cls model object used when to load a YOLOv5Cls model exported
+ * by YOLOv5Cls.
+ */
+class ULTRAINFER_DECL YOLOv5Cls : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov5cls.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv5Cls(const std::string &model_file, const std::string &params_file = "",
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "yolov5cls"; }
+
+  /** \brief Predict the classification result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output
+   * classification result will be writen to this structure \return true if the
+   * prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, ClassifyResult *result);
+
+  /** \brief Predict the classification results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output classification result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<ClassifyResult> *results);
+
+  /// Get preprocessor reference of YOLOv5Cls
+  virtual YOLOv5ClsPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of YOLOv5Cls
+  virtual YOLOv5ClsPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  YOLOv5ClsPreprocessor preprocessor_;
+  YOLOv5ClsPostprocessor postprocessor_;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc
new file mode 100755
index 0000000000..f61cfb20d7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/contrib/yolov5cls/yolov5cls_pybind.cc
@@ -0,0 +1,108 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv5Cls(pybind11::module &m) {
+  pybind11::class_<vision::classification::YOLOv5ClsPreprocessor>(
+      m, "YOLOv5ClsPreprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::classification::YOLOv5ClsPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+             if (!self.Run(&images, &outputs, &ims_info)) {
+               throw std::runtime_error(
+                   "raise Exception('Failed to preprocess the input data in "
+                   "YOLOv5ClsPreprocessor.')");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return make_pair(outputs, ims_info);
+           })
+      .def_property("size",
+                    &vision::classification::YOLOv5ClsPreprocessor::GetSize,
+                    &vision::classification::YOLOv5ClsPreprocessor::SetSize);
+
+  pybind11::class_<vision::classification::YOLOv5ClsPostprocessor>(
+      m, "YOLOv5ClsPostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::classification::YOLOv5ClsPostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::ClassifyResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "raise Exception('Failed to postprocess the runtime result "
+                   "in YOLOv5ClsPostprocessor.')");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::classification::YOLOv5ClsPostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::ClassifyResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "raise Exception('Failed to postprocess the runtime result "
+                   "in YOLOv5ClsPostprocessor.')");
+             }
+             return results;
+           })
+      .def_property("topk",
+                    &vision::classification::YOLOv5ClsPostprocessor::GetTopK,
+                    &vision::classification::YOLOv5ClsPostprocessor::SetTopK);
+
+  pybind11::class_<vision::classification::YOLOv5Cls, UltraInferModel>(
+      m, "YOLOv5Cls")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::classification::YOLOv5Cls &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::ClassifyResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::classification::YOLOv5Cls &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::ClassifyResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly(
+          "preprocessor", &vision::classification::YOLOv5Cls::GetPreprocessor)
+      .def_property_readonly(
+          "postprocessor",
+          &vision::classification::YOLOv5Cls::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc
new file mode 100755
index 0000000000..060715523c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.cc
@@ -0,0 +1,123 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/ppcls/model.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+PaddleClasModel::PaddleClasModel(const std::string &model_file,
+                                 const std::string &params_file,
+                                 const std::string &config_file,
+                                 const RuntimeOption &custom_option,
+                                 const ModelFormat &model_format)
+    : preprocessor_(config_file) {
+  if (model_format == ModelFormat::PADDLE) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ipu_backends = {Backend::PDINFER};
+    valid_directml_backends = {Backend::ORT};
+  } else if (model_format == ModelFormat::SOPHGO) {
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+  } else {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_directml_backends = {Backend::ORT};
+    valid_horizon_backends = {Backend::HORIZONNPU};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+std::unique_ptr<PaddleClasModel> PaddleClasModel::Clone() const {
+  std::unique_ptr<PaddleClasModel> clone_model =
+      utils::make_unique<PaddleClasModel>(PaddleClasModel(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool PaddleClasModel::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PaddleClasModel::Predict(cv::Mat *im, ClassifyResult *result, int topk) {
+  postprocessor_.SetTopk(topk);
+  if (!Predict(*im, result)) {
+    return false;
+  }
+  return true;
+}
+
+bool PaddleClasModel::Predict(const cv::Mat &im, ClassifyResult *result) {
+  FDMat mat = WrapMat(im);
+  return Predict(mat, result);
+}
+
+bool PaddleClasModel::BatchPredict(const std::vector<cv::Mat> &images,
+                                   std::vector<ClassifyResult> *results) {
+  std::vector<FDMat> mats = WrapMat(images);
+  return BatchPredict(mats, results);
+}
+
+bool PaddleClasModel::Predict(const FDMat &mat, ClassifyResult *result) {
+  std::vector<ClassifyResult> results;
+  std::vector<FDMat> mats = {mat};
+  if (!BatchPredict(mats, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool PaddleClasModel::BatchPredict(const std::vector<FDMat> &mats,
+                                   std::vector<ClassifyResult> *results) {
+  std::vector<FDMat> fd_mats = mats;
+  if (!preprocessor_.Run(&fd_mats, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h
new file mode 100755
index 0000000000..12e9574d02
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/model.h
@@ -0,0 +1,128 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/classification/ppcls/postprocessor.h"
+#include "ultrainfer/vision/classification/ppcls/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All classification model APIs are defined inside this namespace
+ *
+ */
+namespace classification {
+/*! @brief PaddleClas serials model object used when to load a PaddleClas model
+ * exported by PaddleClas repository
+ */
+class ULTRAINFER_DECL PaddleClasModel : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g
+   * resnet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference,
+   * the default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  PaddleClasModel(const std::string &model_file, const std::string &params_file,
+                  const std::string &config_file,
+                  const RuntimeOption &custom_option = RuntimeOption(),
+                  const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new PaddleClasModel with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new PaddleClasModel* type unique pointer
+   */
+  virtual std::unique_ptr<PaddleClasModel> Clone() const;
+
+  /// Get model's name
+  virtual std::string ModelName() const { return "PaddleClas/Model"; }
+
+  /** \brief DEPRECATED Predict the classification result for an input image,
+   * remove at 1.0 version
+   *
+   * \param[in] im The input image data, comes from cv::imread()
+   * \param[in] result The output classification result will be writen to this
+   * structure \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, ClassifyResult *result, int topk = 1);
+
+  /** \brief Predict the classification result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread()
+   * \param[in] result The output classification result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, ClassifyResult *result);
+
+  /** \brief Predict the classification results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output classification result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<ClassifyResult> *results);
+
+  /** \brief Predict the classification result for an input image
+   *
+   * \param[in] mat The input mat
+   * \param[in] result The output classification result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const FDMat &mat, ClassifyResult *result);
+
+  /** \brief Predict the classification results for a batch of input images
+   *
+   * \param[in] mats, The input mat list
+   * \param[in] results The output classification result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<FDMat> &mats,
+                            std::vector<ClassifyResult> *results);
+
+  /// Get preprocessor reference of PaddleClasModel
+  virtual PaddleClasPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of PaddleClasModel
+  virtual PaddleClasPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  PaddleClasPreprocessor preprocessor_;
+  PaddleClasPostprocessor postprocessor_;
+};
+
+typedef PaddleClasModel PPLCNet;
+typedef PaddleClasModel PPLCNetv2;
+typedef PaddleClasModel EfficientNet;
+typedef PaddleClasModel GhostNet;
+typedef PaddleClasModel MobileNetv1;
+typedef PaddleClasModel MobileNetv2;
+typedef PaddleClasModel MobileNetv3;
+typedef PaddleClasModel ShuffleNetv2;
+typedef PaddleClasModel SqueezeNet;
+typedef PaddleClasModel Inceptionv3;
+typedef PaddleClasModel PPHGNet;
+typedef PaddleClasModel ResNet50vd;
+typedef PaddleClasModel SwinTransformer;
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc
new file mode 100755
index 0000000000..03f5222453
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.cc
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/ppcls/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+PaddleClasPostprocessor::PaddleClasPostprocessor(int topk) {
+  topk_ = topk;
+  initialized_ = true;
+}
+
+bool PaddleClasPostprocessor::Run(const std::vector<FDTensor> &infer_result,
+                                  std::vector<ClassifyResult> *results) {
+  if (!initialized_) {
+    FDERROR << "Postprocessor is not initialized." << std::endl;
+    return false;
+  }
+
+  int batch = infer_result[0].shape[0];
+  int num_classes = infer_result[0].shape[1];
+  const float *infer_result_data =
+      reinterpret_cast<const float *>(infer_result[0].Data());
+
+  results->resize(batch);
+
+  int topk = std::min(num_classes, topk_);
+  for (int i = 0; i < batch; ++i) {
+    (*results)[i].label_ids = utils::TopKIndices(
+        infer_result_data + i * num_classes, num_classes, topk);
+    (*results)[i].scores.resize(topk);
+    for (int j = 0; j < topk; ++j) {
+      (*results)[i].scores[j] =
+          infer_result_data[i * num_classes + (*results)[i].label_ids[j]];
+    }
+  }
+
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h
new file mode 100755
index 0000000000..fafcef58bc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/postprocessor.h
@@ -0,0 +1,56 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace classification {
+/*! @brief Postprocessor object for PaddleClas serials model.
+ */
+class ULTRAINFER_DECL PaddleClasPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for PaddleClas serials model
+   *
+   * \param[in] topk The topk result filtered by the classify confidence score,
+   * default 1
+   */
+  explicit PaddleClasPostprocessor(int topk = 1);
+
+  /** \brief Process the result of runtime and fill to ClassifyResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of classification
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<ClassifyResult> *result);
+
+  /// Set topk value
+  void SetTopk(int topk) { topk_ = topk; }
+
+  /// Get topk value
+  int GetTopk() const { return topk_; }
+
+private:
+  int topk_ = 1;
+  bool initialized_ = false;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc
new file mode 100755
index 0000000000..b5826e8084
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/ppcls_pybind.cc
@@ -0,0 +1,99 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPaddleClas(pybind11::module &m) {
+  pybind11::class_<vision::classification::PaddleClasPreprocessor,
+                   vision::ProcessorManager>(m, "PaddleClasPreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("disable_normalize",
+           [](vision::classification::PaddleClasPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::classification::PaddleClasPreprocessor &self) {
+             self.DisablePermute();
+           })
+      .def("initial_resize_on_cpu",
+           [](vision::classification::PaddleClasPreprocessor &self, bool v) {
+             self.InitialResizeOnCpu(v);
+           });
+
+  pybind11::class_<vision::classification::PaddleClasPostprocessor>(
+      m, "PaddleClasPostprocessor")
+      .def(pybind11::init<int>())
+      .def("run",
+           [](vision::classification::PaddleClasPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::ClassifyResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PaddleClasPostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::classification::PaddleClasPostprocessor &self,
+              std::vector<pybind11::array> &input_array) {
+             std::vector<vision::ClassifyResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PaddleClasPostprocessor.");
+             }
+             return results;
+           })
+      .def_property("topk",
+                    &vision::classification::PaddleClasPostprocessor::GetTopk,
+                    &vision::classification::PaddleClasPostprocessor::SetTopk);
+
+  pybind11::class_<vision::classification::PaddleClasModel, UltraInferModel>(
+      m, "PaddleClasModel")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("clone",
+           [](vision::classification::PaddleClasModel &self) {
+             return self.Clone();
+           })
+      .def("predict",
+           [](vision::classification::PaddleClasModel &self,
+              pybind11::array &data) {
+             cv::Mat im = PyArrayToCvMat(data);
+             vision::ClassifyResult result;
+             self.Predict(im, &result);
+             return result;
+           })
+      .def("batch_predict",
+           [](vision::classification::PaddleClasModel &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::ClassifyResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly(
+          "preprocessor",
+          &vision::classification::PaddleClasModel::GetPreprocessor)
+      .def_property_readonly(
+          "postprocessor",
+          &vision::classification::PaddleClasModel::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc
new file mode 100755
index 0000000000..eeece4461f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.cc
@@ -0,0 +1,156 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/ppcls/preprocessor.h"
+
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+PaddleClasPreprocessor::PaddleClasPreprocessor(const std::string &config_file) {
+  this->config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create PaddleClasPreprocessor.");
+  initialized_ = true;
+}
+
+bool PaddleClasPreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+  auto preprocess_cfg = cfg["PreProcess"]["transform_ops"];
+  processors_.push_back(std::make_shared<BGR2RGB>());
+  for (const auto &op : preprocess_cfg) {
+    FDASSERT(op.IsMap(),
+             "Require the transform information in yaml be Map type.");
+    auto op_name = op.begin()->first.as<std::string>();
+    if (op_name == "ResizeImage") {
+      if (op.begin()->second["resize_short"]) {
+        int target_size = op.begin()->second["resize_short"].as<int>();
+        bool use_scale = false;
+        int interp = 1;
+        processors_.push_back(
+            std::make_shared<ResizeByShort>(target_size, 1, use_scale));
+      } else if (op.begin()->second["size"]) {
+        int width = 0;
+        int height = 0;
+        if (op.begin()->second["size"].IsScalar()) {
+          auto size = op.begin()->second["size"].as<int>();
+          width = size;
+          height = size;
+        } else {
+          auto size = op.begin()->second["size"].as<std::vector<int>>();
+          width = size[0];
+          height = size[1];
+        }
+        processors_.push_back(
+            std::make_shared<Resize>(width, height, -1.0, -1.0, 1, false));
+      } else {
+        FDERROR << "Invalid params for ResizeImage for both 'size' and "
+                   "'resize_short' are None"
+                << std::endl;
+      }
+
+    } else if (op_name == "CropImage") {
+      int width = op.begin()->second["size"].as<int>();
+      int height = op.begin()->second["size"].as<int>();
+      processors_.push_back(std::make_shared<CenterCrop>(width, height));
+    } else if (op_name == "NormalizeImage") {
+      if (!disable_normalize_) {
+        auto mean = op.begin()->second["mean"].as<std::vector<float>>();
+        auto std = op.begin()->second["std"].as<std::vector<float>>();
+        const auto &scale_origin = op.begin()->second["scale"];
+        float scale;
+        if (scale_origin.as<std::string>() == "1/255") {
+          scale = 1.0f / 255.0f;
+        } else {
+          scale = scale_origin.as<float>();
+        }
+        processors_.push_back(std::make_shared<Normalize>(
+            mean, std, true, std::vector<float>(mean.size(), 0.0f),
+            std::vector<float>(mean.size(), 1.0f / scale)));
+      }
+    } else if (op_name == "ToCHWImage") {
+      if (!disable_permute_) {
+        processors_.push_back(std::make_shared<HWC2CHW>());
+      }
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+  return true;
+}
+
+void PaddleClasPreprocessor::DisableNormalize() {
+  this->disable_normalize_ = true;
+  // the DisableNormalize function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+void PaddleClasPreprocessor::DisablePermute() {
+  this->disable_permute_ = true;
+  // the DisablePermute function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+
+bool PaddleClasPreprocessor::Apply(FDMatBatch *image_batch,
+                                   std::vector<FDTensor> *outputs) {
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  for (size_t j = 0; j < processors_.size(); ++j) {
+    image_batch->proc_lib = proc_lib_;
+    if (initial_resize_on_cpu_ && j == 0 &&
+        processors_[j]->Name().find("Resize") == 0) {
+      image_batch->proc_lib = ProcLib::OPENCV;
+    }
+    if (!(*(processors_[j].get()))(image_batch)) {
+      FDERROR << "Failed to processs image in " << processors_[j]->Name() << "."
+              << std::endl;
+      return false;
+    }
+  }
+
+  outputs->resize(1);
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h
new file mode 100755
index 0000000000..97831cd44d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppcls/preprocessor.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace classification {
+/*! @brief Preprocessor object for PaddleClas serials model.
+ */
+class ULTRAINFER_DECL PaddleClasPreprocessor : public ProcessorManager {
+public:
+  /** \brief Create a preprocessor instance for PaddleClas serials model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * resnet/infer_cfg.yml
+   */
+  explicit PaddleClasPreprocessor(const std::string &config_file);
+
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize();
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute();
+
+  /** \brief When the initial operator is Resize, and input image size is large,
+   *     maybe it's better to run resize on CPU, because the HostToDevice memcpy
+   *     is time consuming. Set this true to run the initial resize on CPU.
+   *
+   * \param[in] v ture or false
+   */
+  void InitialResizeOnCpu(bool v) { initial_resize_on_cpu_ = v; }
+
+private:
+  bool BuildPreprocessPipelineFromConfig();
+  bool initialized_ = false;
+  std::vector<std::shared_ptr<Processor>> processors_;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  // read config file
+  std::string config_file_;
+  bool initial_resize_on_cpu_ = false;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc
new file mode 100755
index 0000000000..424921e290
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshitu_pybind.cc
@@ -0,0 +1,101 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPShiTuV2(pybind11::module &m) {
+  pybind11::class_<vision::classification::PPShiTuV2RecognizerPreprocessor,
+                   vision::ProcessorManager>(m,
+                                             "PPShiTuV2RecognizerPreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("disable_normalize",
+           [](vision::classification::PPShiTuV2RecognizerPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::classification::PPShiTuV2RecognizerPreprocessor &self) {
+             self.DisablePermute();
+           })
+      .def("initial_resize_on_cpu",
+           [](vision::classification::PPShiTuV2RecognizerPreprocessor &self,
+              bool v) { self.InitialResizeOnCpu(v); });
+
+  pybind11::class_<vision::classification::PPShiTuV2RecognizerPostprocessor>(
+      m, "PPShiTuV2RecognizerPostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::classification::PPShiTuV2RecognizerPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::ClassifyResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PPShiTuV2RecognizerPostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::classification::PPShiTuV2RecognizerPostprocessor &self,
+              std::vector<pybind11::array> &input_array) {
+             std::vector<vision::ClassifyResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PPShiTuV2RecognizerPostprocessor.");
+             }
+             return results;
+           })
+      .def_property("feature_norm",
+                    &vision::classification::PPShiTuV2RecognizerPostprocessor::
+                        GetFeatureNorm,
+                    &vision::classification::PPShiTuV2RecognizerPostprocessor::
+                        SetFeatureNorm);
+
+  pybind11::class_<vision::classification::PPShiTuV2Recognizer,
+                   UltraInferModel>(m, "PPShiTuV2Recognizer")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("clone",
+           [](vision::classification::PPShiTuV2Recognizer &self) {
+             return self.Clone();
+           })
+      .def("predict",
+           [](vision::classification::PPShiTuV2Recognizer &self,
+              pybind11::array &data) {
+             cv::Mat im = PyArrayToCvMat(data);
+             vision::ClassifyResult result;
+             self.Predict(im, &result);
+             return result;
+           })
+      .def("batch_predict",
+           [](vision::classification::PPShiTuV2Recognizer &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::ClassifyResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly(
+          "preprocessor",
+          &vision::classification::PPShiTuV2Recognizer::GetPreprocessor)
+      .def_property_readonly(
+          "postprocessor",
+          &vision::classification::PPShiTuV2Recognizer::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h
new file mode 100755
index 0000000000..fa3ba4bb42
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_det.h
@@ -0,0 +1,25 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/vision/detection/ppdet/model.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+typedef detection::PicoDet PPShiTuV2Detector;
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc
new file mode 100755
index 0000000000..8ed2e3b882
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.cc
@@ -0,0 +1,121 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+PPShiTuV2Recognizer::PPShiTuV2Recognizer(const std::string &model_file,
+                                         const std::string &params_file,
+                                         const std::string &config_file,
+                                         const RuntimeOption &custom_option,
+                                         const ModelFormat &model_format)
+    : preprocessor_(config_file) {
+  if (model_format == ModelFormat::PADDLE) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ipu_backends = {Backend::PDINFER};
+    valid_directml_backends = {Backend::ORT};
+  } else if (model_format == ModelFormat::SOPHGO) {
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+  } else {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_directml_backends = {Backend::ORT};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+std::unique_ptr<PPShiTuV2Recognizer> PPShiTuV2Recognizer::Clone() const {
+  std::unique_ptr<PPShiTuV2Recognizer> clone_model =
+      utils::make_unique<PPShiTuV2Recognizer>(PPShiTuV2Recognizer(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool PPShiTuV2Recognizer::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPShiTuV2Recognizer::Predict(cv::Mat *im, ClassifyResult *result) {
+  if (!Predict(*im, result)) {
+    return false;
+  }
+  return true;
+}
+
+bool PPShiTuV2Recognizer::Predict(const cv::Mat &im, ClassifyResult *result) {
+  FDMat mat = WrapMat(im);
+  return Predict(mat, result);
+}
+
+bool PPShiTuV2Recognizer::BatchPredict(const std::vector<cv::Mat> &images,
+                                       std::vector<ClassifyResult> *results) {
+  std::vector<FDMat> mats = WrapMat(images);
+  return BatchPredict(mats, results);
+}
+
+bool PPShiTuV2Recognizer::Predict(const FDMat &mat, ClassifyResult *result) {
+  std::vector<ClassifyResult> results;
+  std::vector<FDMat> mats = {mat};
+  if (!BatchPredict(mats, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool PPShiTuV2Recognizer::BatchPredict(const std::vector<FDMat> &mats,
+                                       std::vector<ClassifyResult> *results) {
+  std::vector<FDMat> fd_mats = mats;
+  if (!preprocessor_.Run(&fd_mats, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h
new file mode 100755
index 0000000000..77190d22fc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec.h
@@ -0,0 +1,117 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h"
+#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+/*! @brief PPShiTuV2Recognizer model object used when to load a
+ * PPShiTuV2Recognizer model exported by PP-ShiTuV2 Rec model.
+ */
+class ULTRAINFER_DECL PPShiTuV2Recognizer : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g PPLCNet/inference.pdmodel
+   * \param[in] params_file Path of parameter file, e.g
+   * PPLCNet/inference.pdiparams, if the model format is ONNX, this parameter
+   * will be ignored \param[in] config_file Path of configuration file for
+   * deployment, e.g PPLCNet/inference_cls.yml \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends` \param[in] model_format Model
+   * format of the loaded model, default is Paddle format
+   */
+  PPShiTuV2Recognizer(const std::string &model_file,
+                      const std::string &params_file,
+                      const std::string &config_file,
+                      const RuntimeOption &custom_option = RuntimeOption(),
+                      const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new PPShiTuV2Recognizer with less memory usage when
+   * multiple instances of the same model are created
+   *
+   * \return new PPShiTuV2Recognizer* type unique pointer
+   */
+  virtual std::unique_ptr<PPShiTuV2Recognizer> Clone() const;
+
+  /// Get model's name
+  virtual std::string ModelName() const { return "PPShiTuV2Recognizer"; }
+
+  /** \brief DEPRECATED Predict the feature vector result for an input image,
+   * remove at 1.0 version
+   *
+   * \param[in] im The input image data, comes from cv::imread()
+   * \param[in] result The output feature vector result will be writen to this
+   * structure \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, ClassifyResult *result);
+
+  /** \brief Predict the classification result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread()
+   * \param[in] result The output feature vector result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, ClassifyResult *result);
+
+  /** \brief Predict the feature vector results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output feature vector(namely ClassifyResult.feature)
+   * result list \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<ClassifyResult> *results);
+
+  /** \brief Predict the feature vector result for an input image
+   *
+   * \param[in] mat The input mat
+   * \param[in] result The output feature vector result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const FDMat &mat, ClassifyResult *result);
+
+  /** \brief Predict the feature vector results for a batch of input images
+   *
+   * \param[in] mats, The input mat list
+   * \param[in] results The output feature vector result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<FDMat> &mats,
+                            std::vector<ClassifyResult> *results);
+
+  /// Get preprocessor reference of PPShiTuV2Recognizer
+  virtual PPShiTuV2RecognizerPreprocessor &GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  /// Get postprocessor reference of PPShiTuV2Recognizer
+  virtual PPShiTuV2RecognizerPostprocessor &GetPostprocessor() {
+    return postprocessor_;
+  }
+
+protected:
+  bool Initialize();
+  PPShiTuV2RecognizerPreprocessor preprocessor_;
+  PPShiTuV2RecognizerPostprocessor postprocessor_;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc
new file mode 100755
index 0000000000..fa36ee5815
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.cc
@@ -0,0 +1,58 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+#include <cmath>
+#include <numeric>
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+bool PPShiTuV2RecognizerPostprocessor::Run(
+    const std::vector<FDTensor> &tensors,
+    std::vector<ClassifyResult> *results) {
+  int batch = tensors[0].shape[0]; // e.g [batch, 512]
+  int num_feature = tensors[0].shape[1];
+  const float *tensor_data = reinterpret_cast<const float *>(tensors[0].Data());
+
+  results->resize(batch);
+
+  // post processing per batch=1
+  for (int i = 0; i < batch; ++i) {
+    (*results)[i].feature.resize(num_feature);
+    const float *tensor_data_i_start = tensor_data + i * num_feature;
+    std::memcpy((*results)[i].feature.data(), tensor_data_i_start,
+                num_feature * sizeof(float));
+    if (feature_norm_) {
+      FeatureNorm((*results)[i].feature);
+    }
+  }
+
+  return true;
+}
+
+void PPShiTuV2RecognizerPostprocessor::FeatureNorm(
+    std::vector<float> &feature) {
+  float feature_sqrt = std::sqrt(std::inner_product(
+      feature.begin(), feature.end(), feature.begin(), 0.0f));
+  for (int i = 0; i < feature.size(); ++i) {
+    feature[i] /= feature_sqrt;
+  }
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h
new file mode 100755
index 0000000000..57e50d69a1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_postprocessor.h
@@ -0,0 +1,50 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace classification {
+/*! @brief Postprocessor object for PP-ShiTuV2 Recognizer model.
+ */
+class ULTRAINFER_DECL PPShiTuV2RecognizerPostprocessor {
+public:
+  PPShiTuV2RecognizerPostprocessor() = default;
+
+  /** \brief Process the result of runtime and fill to ClassifyResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of feature vector (see
+   * ClassifyResult.feature member) \return true if the postprocess successed,
+   * otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<ClassifyResult> *results);
+  /// Set the value of feature_norm_ for Postprocessor
+  void SetFeatureNorm(bool feature_norm) { feature_norm_ = feature_norm; }
+  /// Get the value of feature_norm_ from Postprocessor, default to true.
+  bool GetFeatureNorm() { return feature_norm_; }
+
+private:
+  void FeatureNorm(std::vector<float> &feature);
+  bool feature_norm_ = true;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc
new file mode 100755
index 0000000000..9bf42dba1d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.cc
@@ -0,0 +1,160 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h"
+
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+PPShiTuV2RecognizerPreprocessor::PPShiTuV2RecognizerPreprocessor(
+    const std::string &config_file) {
+  this->config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create PPShiTuV2RecognizerPreprocessor.");
+  initialized_ = true;
+}
+
+bool PPShiTuV2RecognizerPreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+  auto preprocess_cfg = cfg["PreProcess"]["transform_ops"];
+  // Outdated:
+  // We use the key 'RecPreProcess' to denote the preprocess
+  // operators for PP-ShiTuV2 recognizer.
+  // auto preprocess_cfg = cfg["RecPreProcess"]["transform_ops"];
+  processors_.push_back(std::make_shared<BGR2RGB>());
+  for (const auto &op : preprocess_cfg) {
+    FDASSERT(op.IsMap(),
+             "Require the transform information in yaml be Map type.");
+    auto op_name = op.begin()->first.as<std::string>();
+    if (op_name == "ResizeImage") {
+      if (op.begin()->second["resize_short"]) {
+        int target_size = op.begin()->second["resize_short"].as<int>();
+        bool use_scale = false;
+        int interp = 1;
+        processors_.push_back(
+            std::make_shared<ResizeByShort>(target_size, 1, use_scale));
+      } else if (op.begin()->second["size"]) {
+        int width = 0;
+        int height = 0;
+        if (op.begin()->second["size"].IsScalar()) {
+          auto size = op.begin()->second["size"].as<int>();
+          width = size;
+          height = size;
+        } else {
+          auto size = op.begin()->second["size"].as<std::vector<int>>();
+          width = size[0];
+          height = size[1];
+        }
+        processors_.push_back(
+            std::make_shared<Resize>(width, height, -1.0, -1.0, 1, false));
+      } else {
+        FDERROR << "Invalid params for ResizeImage for both 'size' and "
+                   "'resize_short' are None"
+                << std::endl;
+      }
+
+    } else if (op_name == "CropImage") {
+      int width = op.begin()->second["size"].as<int>();
+      int height = op.begin()->second["size"].as<int>();
+      processors_.push_back(std::make_shared<CenterCrop>(width, height));
+    } else if (op_name == "NormalizeImage") {
+      if (!disable_normalize_) {
+        auto mean = op.begin()->second["mean"].as<std::vector<float>>();
+        auto std = op.begin()->second["std"].as<std::vector<float>>();
+        const auto &scale_origin = op.begin()->second["scale"];
+        float scale;
+        if (scale_origin.as<std::string>() == "1/255") {
+          scale = 1.0f / 255.0f;
+        } else {
+          scale = scale_origin.as<float>();
+        }
+        processors_.push_back(std::make_shared<Normalize>(
+            mean, std, true, std::vector<float>(mean.size(), 0.0f),
+            std::vector<float>(mean.size(), 1.0f / scale)));
+      }
+    } else if (op_name == "ToCHWImage") {
+      if (!disable_permute_) {
+        processors_.push_back(std::make_shared<HWC2CHW>());
+      }
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+  return true;
+}
+
+void PPShiTuV2RecognizerPreprocessor::DisableNormalize() {
+  this->disable_normalize_ = true;
+  // the DisableNormalize function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+void PPShiTuV2RecognizerPreprocessor::DisablePermute() {
+  this->disable_permute_ = true;
+  // the DisablePermute function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+
+bool PPShiTuV2RecognizerPreprocessor::Apply(FDMatBatch *image_batch,
+                                            std::vector<FDTensor> *outputs) {
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  for (size_t j = 0; j < processors_.size(); ++j) {
+    image_batch->proc_lib = proc_lib_;
+    if (initial_resize_on_cpu_ && j == 0 &&
+        processors_[j]->Name().find("Resize") == 0) {
+      image_batch->proc_lib = ProcLib::OPENCV;
+    }
+    if (!(*(processors_[j].get()))(image_batch)) {
+      FDERROR << "Failed to processs image in " << processors_[j]->Name() << "."
+              << std::endl;
+      return false;
+    }
+  }
+
+  outputs->resize(1);
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h
new file mode 100755
index 0000000000..5dcee60d48
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/classification/ppshitu/ppshituv2_rec_preprocessor.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace classification {
+
+/*! @brief Preprocessor object for PP-ShiTuV2 Recognizer model.
+ */
+class ULTRAINFER_DECL PPShiTuV2RecognizerPreprocessor
+    : public ProcessorManager {
+public:
+  /** \brief Create a preprocessor instance for PP-ShiTuV2 Recognizer model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * PPLCNet/infer_cfg.yml
+   */
+  explicit PPShiTuV2RecognizerPreprocessor(const std::string &config_file);
+
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize();
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute();
+
+  /** \brief When the initial operator is Resize, and input image size is large,
+   *     maybe it's better to run resize on CPU, because the HostToDevice memcpy
+   *     is time consuming. Set this true to run the initial resize on CPU.
+   *
+   * \param[in] v ture or false
+   */
+  void InitialResizeOnCpu(bool v) { initial_resize_on_cpu_ = v; }
+
+private:
+  bool BuildPreprocessPipelineFromConfig();
+  bool initialized_ = false;
+  std::vector<std::shared_ptr<Processor>> processors_;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  // read config file
+  std::string config_file_;
+  bool initial_resize_on_cpu_ = false;
+};
+
+} // namespace classification
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc
new file mode 100755
index 0000000000..194b3ced2d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.cc
@@ -0,0 +1,112 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/image_decoder/image_decoder.h"
+
+#include "opencv2/imgcodecs.hpp"
+
+namespace ultrainfer {
+namespace vision {
+
+ImageDecoder::ImageDecoder(ImageDecoderLib lib) {
+  if (lib == ImageDecoderLib::NVJPEG) {
+#ifdef ENABLE_NVJPEG
+    nvjpeg::init_decoder(nvjpeg_params_);
+#endif
+  }
+  lib_ = lib;
+}
+
+ImageDecoder::~ImageDecoder() {
+  if (lib_ == ImageDecoderLib::NVJPEG) {
+#ifdef ENABLE_NVJPEG
+    nvjpeg::destroy_decoder(nvjpeg_params_);
+#endif
+  }
+}
+
+bool ImageDecoder::Decode(const std::string &img_name, FDMat *mat) {
+  std::vector<FDMat> mats(1);
+  mats[0] = std::move(*mat);
+  if (!BatchDecode({img_name}, &mats)) {
+    return false;
+  }
+  *mat = std::move(mats[0]);
+  return true;
+}
+
+bool ImageDecoder::BatchDecode(const std::vector<std::string> &img_names,
+                               std::vector<FDMat> *mats) {
+  if (lib_ == ImageDecoderLib::OPENCV) {
+    return ImplByOpenCV(img_names, mats);
+  } else if (lib_ == ImageDecoderLib::NVJPEG) {
+    return ImplByNvJpeg(img_names, mats);
+  }
+  return true;
+}
+
+bool ImageDecoder::ImplByOpenCV(const std::vector<std::string> &img_names,
+                                std::vector<FDMat> *mats) {
+  for (size_t i = 0; i < img_names.size(); ++i) {
+    cv::Mat im = cv::imread(img_names[i]);
+    (*mats)[i].SetMat(im);
+    (*mats)[i].layout = Layout::HWC;
+    (*mats)[i].SetWidth(im.cols);
+    (*mats)[i].SetHeight(im.rows);
+    (*mats)[i].SetChannels(im.channels());
+  }
+  return true;
+}
+
+bool ImageDecoder::ImplByNvJpeg(const std::vector<std::string> &img_names,
+                                std::vector<FDMat> *mats) {
+#ifdef ENABLE_NVJPEG
+  nvjpeg_params_.batch_size = img_names.size();
+  std::vector<nvjpegImage_t> output_imgs(nvjpeg_params_.batch_size);
+  std::vector<int> widths(nvjpeg_params_.batch_size);
+  std::vector<int> heights(nvjpeg_params_.batch_size);
+  // TODO(wangxinyu): support other output format
+  nvjpeg_params_.fmt = NVJPEG_OUTPUT_BGRI;
+  double total;
+  nvjpeg_params_.stream = (*mats)[0].Stream();
+
+  std::vector<FDTensor *> output_buffers;
+  for (size_t i = 0; i < mats->size(); ++i) {
+    FDASSERT((*mats)[i].output_cache != nullptr,
+             "The output_cache of FDMat was not set.");
+    output_buffers.push_back((*mats)[i].output_cache);
+  }
+
+  if (nvjpeg::process_images(img_names, nvjpeg_params_, total, output_imgs,
+                             output_buffers, widths, heights)) {
+    // If nvJPEG decode failed, will fallback to OpenCV,
+    // e.g. png format is not supported by nvJPEG
+    FDWARNING << "nvJPEG decode failed, falling back to OpenCV for this batch"
+              << std::endl;
+    return ImplByOpenCV(img_names, mats);
+  }
+
+  for (size_t i = 0; i < mats->size(); ++i) {
+    (*mats)[i].mat_type = ProcLib::CUDA;
+    (*mats)[i].layout = Layout::HWC;
+    (*mats)[i].SetTensor(output_buffers[i]);
+  }
+#else
+  FDASSERT(false, "UltraInfer didn't compile with NVJPEG.");
+#endif
+  return true;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h
new file mode 100755
index 0000000000..dbd2342900
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/image_decoder.h
@@ -0,0 +1,49 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+
+namespace ultrainfer {
+namespace vision {
+
+enum class ULTRAINFER_DECL ImageDecoderLib { OPENCV, NVJPEG };
+
+class ULTRAINFER_DECL ImageDecoder {
+public:
+  explicit ImageDecoder(ImageDecoderLib lib = ImageDecoderLib::OPENCV);
+
+  ~ImageDecoder();
+
+  bool Decode(const std::string &img_name, FDMat *mat);
+
+  bool BatchDecode(const std::vector<std::string> &img_names,
+                   std::vector<FDMat> *mats);
+
+private:
+  bool ImplByOpenCV(const std::vector<std::string> &img_names,
+                    std::vector<FDMat> *mats);
+  bool ImplByNvJpeg(const std::vector<std::string> &img_names,
+                    std::vector<FDMat> *mats);
+  ImageDecoderLib lib_ = ImageDecoderLib::OPENCV;
+#ifdef ENABLE_NVJPEG
+  nvjpeg::decode_params_t nvjpeg_params_;
+#endif
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc
new file mode 100755
index 0000000000..9181758e75
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.cc
@@ -0,0 +1,364 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Part of the following code in this file refs to
+// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.cpp
+//
+// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Licensed under the Apache-2.0 license
+// \brief
+// \author NVIDIA
+
+#ifdef ENABLE_NVJPEG
+#include "ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace nvjpeg {
+
+#define CHECK_CUDA(call)                                                       \
+  {                                                                            \
+    cudaError_t _e = (call);                                                   \
+    if (_e != cudaSuccess) {                                                   \
+      std::cout << "CUDA Runtime failure: '#" << _e << "' at " << __FILE__     \
+                << ":" << __LINE__ << std::endl;                               \
+      exit(1);                                                                 \
+    }                                                                          \
+  }
+
+#define CHECK_NVJPEG(call)                                                     \
+  {                                                                            \
+    nvjpegStatus_t _e = (call);                                                \
+    if (_e != NVJPEG_STATUS_SUCCESS) {                                         \
+      std::cout << "NVJPEG failure: '#" << _e << "' at " << __FILE__ << ":"    \
+                << __LINE__ << std::endl;                                      \
+      exit(1);                                                                 \
+    }                                                                          \
+  }
+
+static int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); }
+
+static int dev_free(void *p) { return (int)cudaFree(p); }
+
+static int host_malloc(void **p, size_t s, unsigned int f) {
+  return (int)cudaHostAlloc(p, s, f);
+}
+
+static int host_free(void *p) { return (int)cudaFreeHost(p); }
+
+static int read_images(const FileNames &image_names, FileData &raw_data,
+                       std::vector<size_t> &raw_len) {
+  for (size_t i = 0; i < image_names.size(); ++i) {
+    if (image_names.size() == 0) {
+      std::cerr << "No valid images left in the input list, exit" << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    // Read an image from disk.
+    std::ifstream input(image_names[i].c_str(),
+                        std::ios::in | std::ios::binary | std::ios::ate);
+    if (!(input.is_open())) {
+      std::cerr << "Cannot open image: " << image_names[i] << std::endl;
+      FDASSERT(false, "Read file error.");
+      continue;
+    }
+
+    // Get the size
+    long unsigned int file_size = input.tellg();
+    input.seekg(0, std::ios::beg);
+    // resize if buffer is too small
+    if (raw_data[i].size() < file_size) {
+      raw_data[i].resize(file_size);
+    }
+    if (!input.read(raw_data[i].data(), file_size)) {
+      std::cerr << "Cannot read from file: " << image_names[i] << std::endl;
+      // image_names.erase(cur_iter);
+      FDASSERT(false, "Read file error.");
+      continue;
+    }
+    raw_len[i] = file_size;
+  }
+  return EXIT_SUCCESS;
+}
+
+// prepare buffers for RGBi output format
+static int prepare_buffers(FileData &file_data, std::vector<size_t> &file_len,
+                           std::vector<int> &img_width,
+                           std::vector<int> &img_height,
+                           std::vector<nvjpegImage_t> &ibuf,
+                           std::vector<nvjpegImage_t> &isz,
+                           std::vector<FDTensor *> &output_buffers,
+                           const FileNames &current_names,
+                           decode_params_t &params) {
+  int widths[NVJPEG_MAX_COMPONENT];
+  int heights[NVJPEG_MAX_COMPONENT];
+  int channels;
+  nvjpegChromaSubsampling_t subsampling;
+
+  for (long unsigned int i = 0; i < file_data.size(); i++) {
+    nvjpegStatus_t status = nvjpegGetImageInfo(
+        params.nvjpeg_handle, (unsigned char *)file_data[i].data(), file_len[i],
+        &channels, &subsampling, widths, heights);
+    if (status != NVJPEG_STATUS_SUCCESS) {
+      std::cout << "NVJPEG failure: #" << status << " in nvjpegGetImageInfo."
+                << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    img_width[i] = widths[0];
+    img_height[i] = heights[0];
+
+    int mul = 1;
+    // in the case of interleaved RGB output, write only to single channel, but
+    // 3 samples at once
+    if (params.fmt == NVJPEG_OUTPUT_RGBI || params.fmt == NVJPEG_OUTPUT_BGRI) {
+      channels = 1;
+      mul = 3;
+    } else if (params.fmt == NVJPEG_OUTPUT_RGB ||
+               params.fmt == NVJPEG_OUTPUT_BGR) {
+      // in the case of rgb create 3 buffers with sizes of original image
+      channels = 3;
+      widths[1] = widths[2] = widths[0];
+      heights[1] = heights[2] = heights[0];
+    } else {
+      FDASSERT(false, "Unsupport NVJPEG output format: %d", params.fmt);
+    }
+
+    output_buffers[i]->Resize({heights[0], widths[0], mul * channels},
+                              FDDataType::UINT8, "output_cache", Device::GPU);
+
+    uint8_t *cur_buffer =
+        reinterpret_cast<uint8_t *>(output_buffers[i]->Data());
+
+    // realloc output buffer if required
+    for (int c = 0; c < channels; c++) {
+      int aw = mul * widths[c];
+      int ah = heights[c];
+      size_t sz = aw * ah;
+      ibuf[i].pitch[c] = aw;
+      if (sz > isz[i].pitch[c]) {
+        ibuf[i].channel[c] = cur_buffer;
+        cur_buffer = cur_buffer + sz;
+        isz[i].pitch[c] = sz;
+      }
+    }
+  }
+  return EXIT_SUCCESS;
+}
+
+static void create_decoupled_api_handles(decode_params_t &params) {
+  CHECK_NVJPEG(nvjpegDecoderCreate(params.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT,
+                                   &params.nvjpeg_decoder));
+  CHECK_NVJPEG(nvjpegDecoderStateCreate(params.nvjpeg_handle,
+                                        params.nvjpeg_decoder,
+                                        &params.nvjpeg_decoupled_state));
+
+  CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
+                                        &params.pinned_buffers[0]));
+  CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
+                                        &params.pinned_buffers[1]));
+  CHECK_NVJPEG(nvjpegBufferDeviceCreate(params.nvjpeg_handle, NULL,
+                                        &params.device_buffer));
+
+  CHECK_NVJPEG(
+      nvjpegJpegStreamCreate(params.nvjpeg_handle, &params.jpeg_streams[0]));
+  CHECK_NVJPEG(
+      nvjpegJpegStreamCreate(params.nvjpeg_handle, &params.jpeg_streams[1]));
+
+  CHECK_NVJPEG(nvjpegDecodeParamsCreate(params.nvjpeg_handle,
+                                        &params.nvjpeg_decode_params));
+}
+
+static void destroy_decoupled_api_handles(decode_params_t &params) {
+  CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params.nvjpeg_decode_params));
+  CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[0]));
+  CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[1]));
+  CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[0]));
+  CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[1]));
+  CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params.device_buffer));
+  CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_decoupled_state));
+  CHECK_NVJPEG(nvjpegDecoderDestroy(params.nvjpeg_decoder));
+}
+
+int decode_images(const FileData &img_data, const std::vector<size_t> &img_len,
+                  std::vector<nvjpegImage_t> &out, decode_params_t &params,
+                  double &time) {
+  CHECK_CUDA(cudaStreamSynchronize(params.stream));
+
+  std::vector<const unsigned char *> batched_bitstreams;
+  std::vector<size_t> batched_bitstreams_size;
+  std::vector<nvjpegImage_t> batched_output;
+
+  // bit-streams that batched decode cannot handle
+  std::vector<const unsigned char *> otherdecode_bitstreams;
+  std::vector<size_t> otherdecode_bitstreams_size;
+  std::vector<nvjpegImage_t> otherdecode_output;
+
+  if (params.hw_decode_available) {
+    for (int i = 0; i < params.batch_size; i++) {
+      // extract bitstream meta data to figure out whether a bit-stream can be
+      // decoded
+      nvjpegJpegStreamParseHeader(params.nvjpeg_handle,
+                                  (const unsigned char *)img_data[i].data(),
+                                  img_len[i], params.jpeg_streams[0]);
+      int isSupported = -1;
+      nvjpegDecodeBatchedSupported(params.nvjpeg_handle, params.jpeg_streams[0],
+                                   &isSupported);
+
+      if (isSupported == 0) {
+        batched_bitstreams.push_back((const unsigned char *)img_data[i].data());
+        batched_bitstreams_size.push_back(img_len[i]);
+        batched_output.push_back(out[i]);
+      } else {
+        otherdecode_bitstreams.push_back(
+            (const unsigned char *)img_data[i].data());
+        otherdecode_bitstreams_size.push_back(img_len[i]);
+        otherdecode_output.push_back(out[i]);
+      }
+    }
+  } else {
+    for (int i = 0; i < params.batch_size; i++) {
+      otherdecode_bitstreams.push_back(
+          (const unsigned char *)img_data[i].data());
+      otherdecode_bitstreams_size.push_back(img_len[i]);
+      otherdecode_output.push_back(out[i]);
+    }
+  }
+
+  if (batched_bitstreams.size() > 0) {
+    CHECK_NVJPEG(nvjpegDecodeBatchedInitialize(
+        params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.size(), 1,
+        params.fmt));
+
+    CHECK_NVJPEG(nvjpegDecodeBatched(
+        params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.data(),
+        batched_bitstreams_size.data(), batched_output.data(), params.stream));
+  }
+
+  if (otherdecode_bitstreams.size() > 0) {
+    CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state,
+                                               params.device_buffer));
+    int buffer_index = 0;
+    CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params,
+                                                   params.fmt));
+    for (int i = 0; i < params.batch_size; i++) {
+      CHECK_NVJPEG(nvjpegJpegStreamParse(params.nvjpeg_handle,
+                                         otherdecode_bitstreams[i],
+                                         otherdecode_bitstreams_size[i], 0, 0,
+                                         params.jpeg_streams[buffer_index]));
+
+      CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(
+          params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index]));
+
+      CHECK_NVJPEG(nvjpegDecodeJpegHost(
+          params.nvjpeg_handle, params.nvjpeg_decoder,
+          params.nvjpeg_decoupled_state, params.nvjpeg_decode_params,
+          params.jpeg_streams[buffer_index]));
+
+      CHECK_CUDA(cudaStreamSynchronize(params.stream));
+
+      CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(
+          params.nvjpeg_handle, params.nvjpeg_decoder,
+          params.nvjpeg_decoupled_state, params.jpeg_streams[buffer_index],
+          params.stream));
+
+      buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode
+                                       // to avoid an extra sync
+
+      CHECK_NVJPEG(
+          nvjpegDecodeJpegDevice(params.nvjpeg_handle, params.nvjpeg_decoder,
+                                 params.nvjpeg_decoupled_state,
+                                 &otherdecode_output[i], params.stream));
+    }
+  }
+  return EXIT_SUCCESS;
+}
+
+double process_images(const FileNames &image_names, decode_params_t &params,
+                      double &total, std::vector<nvjpegImage_t> &iout,
+                      std::vector<FDTensor *> &output_buffers,
+                      std::vector<int> &widths, std::vector<int> &heights) {
+  FDASSERT(image_names.size() == params.batch_size,
+           "Number of images and batch size must be equal.");
+  // vector for storing raw files and file lengths
+  FileData file_data(params.batch_size);
+  std::vector<size_t> file_len(params.batch_size);
+  FileNames current_names(params.batch_size);
+  // we wrap over image files to process total_images of files
+  auto file_iter = image_names.begin();
+
+  // output buffer sizes, for convenience
+  std::vector<nvjpegImage_t> isz(params.batch_size);
+
+  for (long unsigned int i = 0; i < iout.size(); i++) {
+    for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) {
+      iout[i].channel[c] = NULL;
+      iout[i].pitch[c] = 0;
+      isz[i].pitch[c] = 0;
+    }
+  }
+
+  if (read_images(image_names, file_data, file_len)) {
+    return EXIT_FAILURE;
+  }
+
+  if (prepare_buffers(file_data, file_len, widths, heights, iout, isz,
+                      output_buffers, image_names, params)) {
+    return EXIT_FAILURE;
+  }
+
+  double time;
+  if (decode_images(file_data, file_len, iout, params, time)) {
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}
+
+void init_decoder(decode_params_t &params) {
+  params.hw_decode_available = true;
+  nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
+  nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
+  nvjpegStatus_t status =
+      nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator,
+                     NVJPEG_FLAGS_DEFAULT, &params.nvjpeg_handle);
+  if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
+    std::cout << "Hardware Decoder not supported. "
+                 "Falling back to default backend"
+              << std::endl;
+    CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator,
+                                &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
+                                &params.nvjpeg_handle));
+    params.hw_decode_available = false;
+  } else {
+    CHECK_NVJPEG(status);
+  }
+
+  CHECK_NVJPEG(
+      nvjpegJpegStateCreate(params.nvjpeg_handle, &params.nvjpeg_state));
+
+  create_decoupled_api_handles(params);
+}
+
+void destroy_decoder(decode_params_t &params) {
+  destroy_decoupled_api_handles(params);
+  CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_state));
+  CHECK_NVJPEG(nvjpegDestroy(params.nvjpeg_handle));
+}
+
+} // namespace nvjpeg
+} // namespace vision
+} // namespace ultrainfer
+
+#endif // ENABLE_NVJPEG
diff --git a/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h
new file mode 100755
index 0000000000..14080f611a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/image_decoder/nvjpeg_decoder.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Part of the following code in this file refs to
+// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.h
+//
+// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Licensed under the Apache-2.0 license
+// \brief
+// \author NVIDIA
+
+#pragma once
+
+#ifdef ENABLE_NVJPEG
+#include "ultrainfer/core/fd_tensor.h"
+
+#include <cuda_runtime_api.h>
+#include <nvjpeg.h>
+
+namespace ultrainfer {
+namespace vision {
+namespace nvjpeg {
+
+typedef std::vector<std::string> FileNames;
+typedef std::vector<std::vector<char>> FileData;
+
+struct decode_params_t {
+  int batch_size;
+  nvjpegJpegState_t nvjpeg_state;
+  nvjpegHandle_t nvjpeg_handle;
+  cudaStream_t stream;
+
+  // used with decoupled API
+  nvjpegJpegState_t nvjpeg_decoupled_state;
+  nvjpegBufferPinned_t pinned_buffers[2]; // 2 buffers for pipelining
+  nvjpegBufferDevice_t device_buffer;
+  nvjpegJpegStream_t jpeg_streams[2]; // 2 streams for pipelining
+  nvjpegDecodeParams_t nvjpeg_decode_params;
+  nvjpegJpegDecoder_t nvjpeg_decoder;
+
+  nvjpegOutputFormat_t fmt;
+  bool hw_decode_available;
+};
+
+void init_decoder(decode_params_t &params);
+void destroy_decoder(decode_params_t &params);
+
+double process_images(const FileNames &image_names, decode_params_t &params,
+                      double &total, std::vector<nvjpegImage_t> &iout,
+                      std::vector<FDTensor *> &output_buffers,
+                      std::vector<int> &widths, std::vector<int> &heights);
+
+} // namespace nvjpeg
+} // namespace vision
+} // namespace ultrainfer
+
+#endif // ENABLE_NVJPEG
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/base.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/base.cc
new file mode 100755
index 0000000000..4279d3cf8c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/base.cc
@@ -0,0 +1,177 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/processors/proc_lib.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool Processor::ImplByOpenCV(FDMat *mat) {
+  FDERROR << Name() << " Not Implement Yet." << std::endl;
+  return false;
+}
+
+bool Processor::ImplByOpenCV(FDMatBatch *mat_batch) {
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    if (ImplByOpenCV(&(*(mat_batch->mats))[i]) != true) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Processor::ImplByFlyCV(FDMat *mat) { return ImplByOpenCV(mat); }
+
+bool Processor::ImplByFlyCV(FDMatBatch *mat_batch) {
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    if (ImplByFlyCV(&(*(mat_batch->mats))[i]) != true) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Processor::ImplByCuda(FDMat *mat) {
+  FDWARNING << Name()
+            << " is not implemented with CUDA, will fallback to OpenCV."
+            << std::endl;
+  return ImplByOpenCV(mat);
+}
+
+bool Processor::ImplByCuda(FDMatBatch *mat_batch) {
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    if (ImplByCuda(&(*(mat_batch->mats))[i]) != true) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Processor::ImplByCvCuda(FDMat *mat) {
+  FDWARNING << Name()
+            << " is not implemented with CV-CUDA, will fallback to OpenCV."
+            << std::endl;
+  return ImplByOpenCV(mat);
+}
+
+bool Processor::ImplByCvCuda(FDMatBatch *mat_batch) {
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    if (ImplByCvCuda(&(*(mat_batch->mats))[i]) != true) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Processor::operator()(FDMat *mat) {
+  ProcLib target = mat->proc_lib;
+  if (mat->proc_lib == ProcLib::DEFAULT) {
+    target = DefaultProcLib::default_lib;
+  }
+  if (target == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    return ImplByFlyCV(mat);
+#else
+    FDASSERT(false, "UltraInfer didn't compile with FlyCV.");
+#endif
+  } else if (target == ProcLib::CUDA) {
+#ifdef WITH_GPU
+    FDASSERT(mat->Stream() != nullptr,
+             "CUDA processor requires cuda stream, please set stream for Mat");
+    return ImplByCuda(mat);
+#else
+    FDASSERT(false, "UltraInfer didn't compile with WITH_GPU.");
+#endif
+  } else if (target == ProcLib::CVCUDA) {
+#ifdef ENABLE_CVCUDA
+    FDASSERT(mat->Stream() != nullptr,
+             "CV-CUDA requires cuda stream, please set stream for Mat");
+    return ImplByCvCuda(mat);
+#else
+    FDASSERT(false, "UltraInfer didn't compile with CV-CUDA.");
+#endif
+  }
+  // DEFAULT & OPENCV
+  return ImplByOpenCV(mat);
+}
+
+bool Processor::operator()(FDMat *mat, ProcLib lib) {
+  mat->proc_lib = lib;
+  return operator()(mat);
+}
+
+bool Processor::operator()(FDMatBatch *mat_batch) {
+  ProcLib target = mat_batch->proc_lib;
+  if (mat_batch->proc_lib == ProcLib::DEFAULT) {
+    target = DefaultProcLib::default_lib;
+  }
+  if (target == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    return ImplByFlyCV(mat_batch);
+#else
+    FDASSERT(false, "UltraInfer didn't compile with FlyCV.");
+#endif
+  } else if (target == ProcLib::CUDA) {
+#ifdef WITH_GPU
+    FDASSERT(
+        mat_batch->Stream() != nullptr,
+        "CUDA processor requires cuda stream, please set stream for mat_batch");
+    return ImplByCuda(mat_batch);
+#else
+    FDASSERT(false, "UltraInfer didn't compile with WITH_GPU.");
+#endif
+  } else if (target == ProcLib::CVCUDA) {
+#ifdef ENABLE_CVCUDA
+    FDASSERT(mat_batch->Stream() != nullptr,
+             "CV-CUDA processor requires cuda stream, please set stream for "
+             "mat_batch");
+    return ImplByCvCuda(mat_batch);
+#else
+    FDASSERT(false, "UltraInfer didn't compile with CV-CUDA.");
+#endif
+  }
+  // DEFAULT & OPENCV
+  return ImplByOpenCV(mat_batch);
+}
+
+void EnableFlyCV() {
+#ifdef ENABLE_FLYCV
+  DefaultProcLib::default_lib = ProcLib::FLYCV;
+  FDINFO << "Will change to use image processing library "
+         << DefaultProcLib::default_lib << std::endl;
+#else
+  FDWARNING << "UltraInfer didn't compile with FlyCV, "
+               "will fallback to use OpenCV instead."
+            << std::endl;
+#endif
+}
+
+void DisableFlyCV() {
+  DefaultProcLib::default_lib = ProcLib::OPENCV;
+  FDINFO << "Will change to use image processing library "
+         << DefaultProcLib::default_lib << std::endl;
+}
+
+void SetProcLibCpuNumThreads(int threads) {
+  cv::setNumThreads(threads);
+#ifdef ENABLE_FLYCV
+  fcv::set_thread_num(threads);
+#endif
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/base.h b/libs/ultrainfer/ultrainfer/vision/common/processors/base.h
new file mode 100755
index 0000000000..4a15e66733
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/base.h
@@ -0,0 +1,93 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+#include "ultrainfer/vision/common/processors/mat_batch.h"
+#include <unordered_map>
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Enable using FlyCV to process image while deploy vision models.
+ * Currently, FlyCV in only available on ARM(Linux aarch64), so will
+ * fallback to using OpenCV in other platform
+ */
+ULTRAINFER_DECL void EnableFlyCV();
+
+/// Disable using FlyCV to process image while deploy vision models.
+ULTRAINFER_DECL void DisableFlyCV();
+
+/*! @brief Set the cpu num threads of ProcLib.
+ */
+ULTRAINFER_DECL void SetProcLibCpuNumThreads(int threads);
+
+/*! @brief Processor base class for processors in
+ * ultrainfer/vision/common/processors
+ */
+class ULTRAINFER_DECL Processor {
+public:
+  // default_lib has the highest priority
+  // all the function in `processor` will force to use
+  // default_lib if this flag is set.
+  // DEFAULT means this flag is not set
+  // static ProcLib default_lib;
+
+  virtual std::string Name() = 0;
+
+  virtual bool ImplByOpenCV(FDMat *mat);
+  virtual bool ImplByOpenCV(FDMatBatch *mat_batch);
+
+  virtual bool ImplByFlyCV(FDMat *mat);
+  virtual bool ImplByFlyCV(FDMatBatch *mat_batch);
+
+  virtual bool ImplByCuda(FDMat *mat);
+  virtual bool ImplByCuda(FDMatBatch *mat_batch);
+
+  virtual bool ImplByCvCuda(FDMat *mat);
+  virtual bool ImplByCvCuda(FDMatBatch *mat_batch);
+
+  /*! @brief operator `()` for calling processor in this way: `processor(mat)`
+   *
+   * \param[in] mat: The input mat
+   * \return true if the process successed, otherwise false
+   */
+  virtual bool operator()(FDMat *mat);
+
+  /*! @brief operator `()` for calling processor in this way: `processor(mat,
+   * lib)` This function is for backward compatibility, will be removed in the
+   * near future, please use operator()(FDMat* mat) instead and set proc_lib in
+   * mat.
+   *
+   * \param[in] mat: The input mat
+   * \param[in] lib: The processing library, opencv, cv-cuda, flycv, etc.
+   * \return true if the process successed, otherwise false
+   */
+  virtual bool operator()(FDMat *mat, ProcLib lib);
+
+  /*! @brief operator `()` for calling processor in this way:
+   * `processor(mat_batch)`
+   *
+   * \param[in] mat_batch: The input mat batch
+   * \return true if the process successed, otherwise false
+   */
+  virtual bool operator()(FDMatBatch *mat_batch);
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc
new file mode 100755
index 0000000000..0656fe951b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/base_pybind.cc
@@ -0,0 +1,28 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+#include <pybind11/operators.h>
+
+namespace ultrainfer {
+void BindProcessor(pybind11::module &m) {
+  pybind11::class_<vision::Processor>(m, "Processor")
+      .def("__call__", [](vision::Processor &self,
+                          vision::FDMat *mat) { return self(mat); })
+      .def("__call__",
+           [](vision::Processor &self, vision::FDMatBatch *mat_batch) {
+             return self(mat_batch);
+           });
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc
new file mode 100755
index 0000000000..12047588e7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.cc
@@ -0,0 +1,113 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/cast.h"
+
+#include "ultrainfer/vision/common/processors/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool Cast::ImplByOpenCV(Mat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int c = im->channels();
+  if (dtype_ == "float") {
+    if (im->type() != CV_32FC(c)) {
+      im->convertTo(*im, CV_32FC(c));
+    }
+  } else if (dtype_ == "double") {
+    if (im->type() != CV_64FC(c)) {
+      im->convertTo(*im, CV_64FC(c));
+    }
+  } else {
+    FDWARNING << "Cast not support for " << dtype_
+              << " now! will skip this operation." << std::endl;
+  }
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool Cast::ImplByFlyCV(Mat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (dtype_ == "float" && mat->Type() == FDDataType::FP32) {
+    return true;
+  }
+  if (dtype_ == "double" && mat->Type() == FDDataType::FP64) {
+    return true;
+  }
+  if (mat->layout != Layout::HWC) {
+    FDERROR
+        << "While using FlyCV to cast image, the image must be layout of HWC."
+        << std::endl;
+    return false;
+  }
+  if (dtype_ == "float") {
+    fcv::Mat new_im;
+    auto fcv_type = CreateFlyCVDataType(FDDataType::FP32, im->channels());
+    im->convert_to(new_im, fcv_type);
+    mat->SetMat(new_im);
+  } else if (dtype_ == "double") {
+    fcv::Mat new_im;
+    auto fcv_type = CreateFlyCVDataType(FDDataType::FP64, im->channels());
+    im->convert_to(new_im, fcv_type);
+    mat->SetMat(new_im);
+  } else {
+    FDWARNING << "Cast not support for " << dtype_
+              << " now! will skip this operation." << std::endl;
+  }
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool Cast::ImplByCvCuda(FDMat *mat) {
+  FDDataType dst_dtype;
+  if (dtype_ == "float") {
+    dst_dtype = FDDataType::FP32;
+  } else if (dtype_ == "double") {
+    dst_dtype = FDDataType::FP64;
+  } else {
+    FDWARNING << "Cast not support for " << dtype_
+              << " now! will skip this operation." << std::endl;
+    return false;
+  }
+  if (mat->Type() == dst_dtype) {
+    return true;
+  }
+
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  // Prepare output tensor
+  mat->output_cache->Resize(src->Shape(), dst_dtype, "output_cache",
+                            Device::GPU);
+  auto dst_tensor =
+      CreateCvCudaTensorWrapData(*(mat->output_cache), mat->layout);
+
+  cvcuda_convert_op_(mat->Stream(), *src_tensor, *dst_tensor, 1.0f, 0.0f);
+
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
+bool Cast::Run(Mat *mat, const std::string &dtype, ProcLib lib) {
+  auto c = Cast(dtype);
+  return c(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cast.h b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.h
new file mode 100755
index 0000000000..eddc1c3de6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cast.h
@@ -0,0 +1,59 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpConvertTo.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for cast images with given type deafault is float.
+ */
+class ULTRAINFER_DECL Cast : public Processor {
+public:
+  explicit Cast(const std::string &dtype = "float") : dtype_(dtype) {}
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+#endif
+  std::string Name() { return "Cast"; }
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] dtype type of data will be casted to
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, const std::string &dtype,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+  std::string GetDtype() const { return dtype_; }
+
+private:
+  std::string dtype_;
+#ifdef ENABLE_CVCUDA
+  cvcuda::ConvertTo cvcuda_convert_op_;
+#endif
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc
new file mode 100755
index 0000000000..2b85572465
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cast_pybind.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindCast(pybind11::module &m) {
+  pybind11::class_<vision::Cast, vision::Processor>(m, "Cast").def(
+      pybind11::init<std::string>(), "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc
new file mode 100755
index 0000000000..dd9a3c569a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.cc
@@ -0,0 +1,102 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/center_crop.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool CenterCrop::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int height = static_cast<int>(im->rows);
+  int width = static_cast<int>(im->cols);
+  if (height < height_ || width < width_) {
+    FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
+    return false;
+  }
+  int offset_x = static_cast<int>((width - width_) / 2);
+  int offset_y = static_cast<int>((height - height_) / 2);
+  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
+  cv::Mat new_im = (*im)(crop_roi).clone();
+  mat->SetMat(new_im);
+  mat->SetWidth(width_);
+  mat->SetHeight(height_);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool CenterCrop::ImplByFlyCV(FDMat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  int height = static_cast<int>(im->height());
+  int width = static_cast<int>(im->width());
+  if (height < height_ || width < width_) {
+    FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
+    return false;
+  }
+  int offset_x = static_cast<int>((width - width_) / 2);
+  int offset_y = static_cast<int>((height - height_) / 2);
+  fcv::Rect crop_roi(offset_x, offset_y, width_, height_);
+  fcv::Mat new_im;
+  fcv::crop(*im, new_im, crop_roi);
+  mat->SetMat(new_im);
+  mat->SetWidth(width_);
+  mat->SetHeight(height_);
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool CenterCrop::ImplByCvCuda(FDMat *mat) {
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  // Prepare output tensor
+  mat->output_cache->Resize({height_, width_, mat->Channels()}, src->Dtype(),
+                            "output_cache", Device::GPU);
+  auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+  int offset_x = static_cast<int>((mat->Width() - width_) / 2);
+  int offset_y = static_cast<int>((mat->Height() - height_) / 2);
+  NVCVRectI crop_roi = {offset_x, offset_y, width_, height_};
+  cvcuda_crop_op_(mat->Stream(), *src_tensor, *dst_tensor, crop_roi);
+
+  mat->SetTensor(mat->output_cache);
+  mat->SetWidth(width_);
+  mat->SetHeight(height_);
+  mat->device = Device::GPU;
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+
+bool CenterCrop::ImplByCvCuda(FDMatBatch *mat_batch) {
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    if (ImplByCvCuda(&((*(mat_batch->mats))[i])) != true) {
+      return false;
+    }
+  }
+  mat_batch->device = Device::GPU;
+  mat_batch->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
+bool CenterCrop::Run(FDMat *mat, const int &width, const int &height,
+                     ProcLib lib) {
+  auto c = CenterCrop(width, height);
+  return c(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h
new file mode 100755
index 0000000000..66ad486129
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpCustomCrop.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for crop images in center with given type deafault is
+ * float.
+ */
+class ULTRAINFER_DECL CenterCrop : public Processor {
+public:
+  CenterCrop(int width, int height) : height_(height), width_(width) {}
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+  bool ImplByCvCuda(FDMatBatch *mat_batch);
+#endif
+  std::string Name() { return "CenterCrop"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] width width of data will be croped to
+   * \param[in] height height of data will be croped to
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, const int &width, const int &height,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  int height_;
+  int width_;
+#ifdef ENABLE_CVCUDA
+  cvcuda::CustomCrop cvcuda_crop_op_;
+#endif
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc
new file mode 100755
index 0000000000..f0a5b5e03f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/center_crop_pybind.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindCenterCrop(pybind11::module &m) {
+  pybind11::class_<vision::CenterCrop, vision::Processor>(m, "CenterCrop")
+      .def(pybind11::init<int, int>(), "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc
new file mode 100755
index 0000000000..5bbeed56af
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.cc
@@ -0,0 +1,133 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/color_space_convert.h"
+
+namespace ultrainfer {
+namespace vision {
+bool BGR2RGB::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Mat new_im;
+  cv::cvtColor(*im, new_im, cv::COLOR_BGR2RGB);
+  mat->SetMat(new_im);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool BGR2RGB::ImplByFlyCV(FDMat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "[BGR2RGB] The channel of input image must be 3, but not it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+  fcv::Mat new_im;
+  fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_BGR2PA_RGB);
+  mat->SetMat(new_im);
+  return true;
+}
+#endif
+
+bool RGB2BGR::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Mat new_im;
+  cv::cvtColor(*im, new_im, cv::COLOR_RGB2BGR);
+  mat->SetMat(new_im);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool RGB2BGR::ImplByFlyCV(FDMat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "[RGB2BGR] The channel of input image must be 3, but not it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+  fcv::Mat new_im;
+  fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_RGB2PA_BGR);
+  mat->SetMat(new_im);
+  return true;
+}
+#endif
+
+bool BGR2GRAY::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Mat new_im;
+  cv::cvtColor(*im, new_im, cv::COLOR_BGR2GRAY);
+  mat->SetMat(new_im);
+  mat->SetChannels(1);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool BGR2GRAY::ImplByFlyCV(FDMat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "[BGR2GRAY] The channel of input image must be 3, but not it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+  fcv::Mat new_im;
+  fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_BGR2GRAY);
+  mat->SetMat(new_im);
+  return true;
+}
+#endif
+
+bool RGB2GRAY::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Mat new_im;
+  cv::cvtColor(*im, new_im, cv::COLOR_RGB2GRAY);
+  mat->SetMat(new_im);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool RGB2GRAY::ImplByFlyCV(FDMat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "[RGB2GRAY] The channel of input image must be 3, but not it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+  fcv::Mat new_im;
+  fcv::cvt_color(*im, new_im, fcv::ColorConvertType::CVT_PA_RGB2GRAY);
+  mat->SetMat(new_im);
+  return true;
+}
+#endif
+
+bool BGR2RGB::Run(FDMat *mat, ProcLib lib) {
+  auto b = BGR2RGB();
+  return b(mat, lib);
+}
+
+bool RGB2BGR::Run(FDMat *mat, ProcLib lib) {
+  auto r = RGB2BGR();
+  return r(mat, lib);
+}
+
+bool BGR2GRAY::Run(FDMat *mat, ProcLib lib) {
+  auto b = BGR2GRAY();
+  return b(mat, lib);
+}
+
+bool RGB2GRAY::Run(FDMat *mat, ProcLib lib) {
+  auto r = RGB2GRAY();
+  return r(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h
new file mode 100755
index 0000000000..b908177364
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/color_space_convert.h
@@ -0,0 +1,99 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for tansform images from BGR to RGB.
+ */
+class ULTRAINFER_DECL BGR2RGB : public Processor {
+public:
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+  virtual std::string Name() { return "BGR2RGB"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT);
+};
+
+/*! @brief Processor for tansform images from RGB to BGR.
+ */
+class ULTRAINFER_DECL RGB2BGR : public Processor {
+public:
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+  std::string Name() { return "RGB2BGR"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT);
+};
+
+/*! @brief Processor for tansform images from BGR to GRAY.
+ */
+class ULTRAINFER_DECL BGR2GRAY : public Processor {
+public:
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+  virtual std::string Name() { return "BGR2GRAY"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT);
+};
+
+/*! @brief Processor for tansform images from RGB to GRAY.
+ */
+class ULTRAINFER_DECL RGB2GRAY : public Processor {
+public:
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+  std::string Name() { return "RGB2GRAY"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, ProcLib lib = ProcLib::DEFAULT);
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc
new file mode 100755
index 0000000000..3353b77d18
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/convert.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+Convert::Convert(const std::vector<float> &alpha,
+                 const std::vector<float> &beta) {
+  FDASSERT(alpha.size() == beta.size(),
+           "Convert: requires the size of alpha equal to the size of beta.");
+  FDASSERT(alpha.size() != 0,
+           "Convert: requires the size of alpha and beta > 0.");
+  alpha_.assign(alpha.begin(), alpha.end());
+  beta_.assign(beta.begin(), beta.end());
+}
+
+bool Convert::ImplByOpenCV(Mat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  for (int c = 0; c < im->channels(); c++) {
+    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
+  }
+  cv::merge(split_im, *im);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool Convert::ImplByFlyCV(Mat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  FDASSERT(im->channels() == 3, "Only support 3-channels image in FlyCV.");
+  std::vector<float> mean(3, 0);
+  std::vector<float> std(3, 0);
+  for (size_t i = 0; i < 3; ++i) {
+    std[i] = 1.0 / alpha_[i];
+    mean[i] = -1 * beta_[i] * std[i];
+  }
+  fcv::Mat new_im;
+  fcv::normalize_to_submean_to_reorder(*im, mean, std, std::vector<uint32_t>(),
+                                       new_im, true);
+  mat->SetMat(new_im);
+  return true;
+}
+#endif
+
+bool Convert::Run(Mat *mat, const std::vector<float> &alpha,
+                  const std::vector<float> &beta, ProcLib lib) {
+  auto c = Convert(alpha, beta);
+  return c(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert.h b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.h
new file mode 100755
index 0000000000..9eeff4e36b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert.h
@@ -0,0 +1,52 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+/*! @brief Processor for convert images with given paramters.
+ */
+class ULTRAINFER_DECL Convert : public Processor {
+public:
+  Convert(const std::vector<float> &alpha, const std::vector<float> &beta);
+
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+  std::string Name() { return "Convert"; }
+
+  // Compute `result = mat * alpha + beta` directly by channel.
+  // The default behavior is the same as OpenCV's convertTo method.
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data，`result = mat * alpha + beta`
+   * \param[in] alpha The alpha channel data
+   * \param[in] beta The beta channel data
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, const std::vector<float> &alpha,
+                  const std::vector<float> &beta,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  std::vector<float> alpha_;
+  std::vector<float> beta_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc
new file mode 100755
index 0000000000..2ab6ac638c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/convert_and_permute.h"
+
+namespace ultrainfer {
+namespace vision {
+
+ConvertAndPermute::ConvertAndPermute(const std::vector<float> &alpha,
+                                     const std::vector<float> &beta,
+                                     bool swap_rb) {
+  FDASSERT(alpha.size() == beta.size(), "ConvertAndPermute: requires the size "
+                                        "of alpha equal to the size of beta.");
+  FDASSERT(alpha.size() > 0 && beta.size() > 0,
+           "ConvertAndPermute: requires the size of alpha and beta > 0.");
+  alpha_.assign(alpha.begin(), alpha.end());
+  beta_.assign(beta.begin(), beta.end());
+  swap_rb_ = swap_rb;
+}
+
+bool ConvertAndPermute::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  if (swap_rb_)
+    std::swap(split_im[0], split_im[2]);
+  for (int c = 0; c < im->channels(); c++) {
+    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
+  }
+  cv::Mat res(origin_h, origin_w, CV_32FC(im->channels()));
+  for (int i = 0; i < im->channels(); ++i) {
+    cv::extractChannel(split_im[i],
+                       cv::Mat(origin_h, origin_w, CV_32FC1,
+                               res.ptr() + i * origin_h * origin_w * 4),
+                       0);
+  }
+
+  mat->SetMat(res);
+  mat->layout = Layout::CHW;
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool ConvertAndPermute::ImplByFlyCV(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Only supports input with HWC layout." << std::endl;
+    return false;
+  }
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "Only supports 3-channels image in FlyCV, but now it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+  std::vector<float> mean(3, 0);
+  std::vector<float> std(3, 0);
+  for (size_t i = 0; i < 3; ++i) {
+    std[i] = 1.0 / alpha_[i];
+    mean[i] = -1 * beta_[i] * std[i];
+  }
+
+  std::vector<uint32_t> channel_reorder_index = {0, 1, 2};
+  if (swap_rb_)
+    std::swap(channel_reorder_index[0], channel_reorder_index[2]);
+
+  fcv::Mat new_im;
+  fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index,
+                                       new_im, false);
+  mat->SetMat(new_im);
+  mat->layout = Layout::CHW;
+  return true;
+}
+#endif
+
+bool ConvertAndPermute::Run(FDMat *mat, const std::vector<float> &alpha,
+                            const std::vector<float> &beta, bool swap_rb,
+                            ProcLib lib) {
+  auto n = ConvertAndPermute(alpha, beta, swap_rb);
+  return n(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h
new file mode 100755
index 0000000000..f00aad5f63
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/convert_and_permute.h
@@ -0,0 +1,85 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+/*! @brief Processor for convert images with given paramters and permute images
+ * from HWC to CHW.
+ */
+class ULTRAINFER_DECL ConvertAndPermute : public Processor {
+public:
+  ConvertAndPermute(const std::vector<float> &alpha = std::vector<float>(),
+                    const std::vector<float> &beta = std::vector<float>(),
+                    bool swap_rb = false);
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+  std::string Name() { return "ConvertAndPermute"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data，`result = mat * alpha + beta`
+   * \param[in] alpha The alpha channel data
+   * \param[in] beta The beta channel data
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, const std::vector<float> &alpha,
+                  const std::vector<float> &beta, bool swap_rb = false,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+  std::vector<float> GetAlpha() const { return alpha_; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] alpha set the value of the alpha parameter
+   */
+  void SetAlpha(const std::vector<float> &alpha) {
+    alpha_.clear();
+    std::vector<float>().swap(alpha_);
+    alpha_.assign(alpha.begin(), alpha.end());
+  }
+
+  std::vector<float> GetBeta() const { return beta_; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] beta set the value of the beta parameter
+   */
+  void SetBeta(const std::vector<float> &beta) {
+    beta_.clear();
+    std::vector<float>().swap(beta_);
+    beta_.assign(beta.begin(), beta.end());
+  }
+
+  bool GetSwapRB() { return swap_rb_; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] swap_rb set the value of the swap_rb parameter
+   */
+  void SetSwapRB(bool swap_rb) { swap_rb_ = swap_rb; }
+
+private:
+  std::vector<float> alpha_;
+  std::vector<float> beta_;
+  bool swap_rb_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc
new file mode 100755
index 0000000000..1a8011dbfc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.cc
@@ -0,0 +1,68 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/crop.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool Crop::ImplByOpenCV(Mat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int height = static_cast<int>(im->rows);
+  int width = static_cast<int>(im->cols);
+  if (height < height_ + offset_h_ || width < width_ + offset_w_) {
+    FDERROR << "[Crop] Cannot crop [" << height_ << ", " << width_
+            << "] from the input image [" << height << ", " << width
+            << "], with offset [" << offset_h_ << ", " << offset_w_ << "]."
+            << std::endl;
+    return false;
+  }
+  cv::Rect crop_roi(offset_w_, offset_h_, width_, height_);
+  cv::Mat new_im = (*im)(crop_roi).clone();
+  mat->SetMat(new_im);
+  mat->SetWidth(width_);
+  mat->SetHeight(height_);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool Crop::ImplByFlyCV(Mat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  int height = static_cast<int>(im->height());
+  int width = static_cast<int>(im->width());
+  if (height < height_ + offset_h_ || width < width_ + offset_w_) {
+    FDERROR << "[Crop] Cannot crop [" << height_ << ", " << width_
+            << "] from the input image [" << height << ", " << width
+            << "], with offset [" << offset_h_ << ", " << offset_w_ << "]."
+            << std::endl;
+    return false;
+  }
+  fcv::Rect crop_roi(offset_w_, offset_h_, width_, height_);
+  fcv::Mat new_im;
+  fcv::crop(*im, new_im, crop_roi);
+  mat->SetMat(new_im);
+  mat->SetWidth(width_);
+  mat->SetHeight(height_);
+  return true;
+}
+#endif
+
+bool Crop::Run(Mat *mat, int offset_w, int offset_h, int width, int height,
+               ProcLib lib) {
+  auto c = Crop(offset_w, offset_h, width, height);
+  return c(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/crop.h b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.h
new file mode 100755
index 0000000000..b40dd97015
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/crop.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for crop images with given paramters.
+ */
+class ULTRAINFER_DECL Crop : public Processor {
+public:
+  Crop(int offset_w, int offset_h, int width, int height) {
+    offset_w_ = offset_w;
+    offset_h_ = offset_h;
+    width_ = width;
+    height_ = height;
+  }
+
+  bool ImplByOpenCV(Mat *mat);
+
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+  std::string Name() { return "Crop"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] offset_w The offset of width.
+   * \param[in] offset_h The offset of height.
+   * \param[in] width The width of the output image.
+   * \param[in] height The height of the output image.
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, int offset_w, int offset_h, int width, int height,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  int offset_w_;
+  int offset_h_;
+  int height_;
+  int width_;
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc
new file mode 100755
index 0000000000..9841af7df1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.cc
@@ -0,0 +1,127 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+
+#ifdef ENABLE_CVCUDA
+nvcv::ImageFormat CreateCvCudaImageFormat(FDDataType type, int channel,
+                                          bool interleaved) {
+  FDASSERT(channel == 1 || channel == 3 || channel == 4,
+           "Only support channel be 1/3/4 in CV-CUDA.");
+  if (type == FDDataType::UINT8) {
+    if (channel == 1) {
+      return nvcv::FMT_U8;
+    } else if (channel == 3) {
+      return (interleaved ? nvcv::FMT_BGR8 : nvcv::FMT_BGR8p);
+    } else {
+      return (interleaved ? nvcv::FMT_BGRA8 : nvcv::FMT_BGRA8p);
+    }
+  } else if (type == FDDataType::FP32) {
+    if (channel == 1) {
+      return nvcv::FMT_F32;
+    } else if (channel == 3) {
+      return (interleaved ? nvcv::FMT_BGRf32 : nvcv::FMT_BGRf32p);
+    } else {
+      return (interleaved ? nvcv::FMT_BGRAf32 : nvcv::FMT_BGRAf32p);
+    }
+  }
+  FDASSERT(false, "Data type of %s is not supported.", Str(type).c_str());
+  return nvcv::FMT_BGRf32;
+}
+
+std::shared_ptr<nvcv::TensorWrapData>
+CreateCvCudaTensorWrapData(const FDTensor &tensor, Layout layout) {
+  FDASSERT(tensor.shape.size() == 3, "When create CVCUDA tensor from FD tensor,"
+                                     "tensor shape should be 3-Dim,");
+  int batchsize = 1;
+  int h = tensor.Shape()[0];
+  int w = tensor.Shape()[1];
+  int c = tensor.Shape()[2];
+
+  nvcv::TensorDataStridedCuda::Buffer buf;
+  buf.strides[3] = FDDataTypeSize(tensor.Dtype());
+  buf.strides[2] = c * buf.strides[3];
+  buf.strides[1] = w * buf.strides[2];
+  buf.strides[0] = h * buf.strides[1];
+  if (layout == Layout::CHW) {
+    c = tensor.Shape()[0];
+    h = tensor.Shape()[1];
+    w = tensor.Shape()[2];
+    buf.strides[3] = FDDataTypeSize(tensor.Dtype());
+    buf.strides[2] = w * buf.strides[3];
+    buf.strides[1] = h * buf.strides[2];
+    buf.strides[0] = c * buf.strides[1];
+  }
+  buf.basePtr = reinterpret_cast<NVCVByte *>(const_cast<void *>(tensor.Data()));
+
+  nvcv::Tensor::Requirements req = nvcv::Tensor::CalcRequirements(
+      batchsize, {w, h},
+      CreateCvCudaImageFormat(tensor.Dtype(), c, layout == Layout::HWC));
+
+  nvcv::TensorDataStridedCuda tensor_data(
+      nvcv::TensorShape{req.shape, req.rank, req.layout},
+      nvcv::DataType{req.dtype}, buf);
+  return std::make_shared<nvcv::TensorWrapData>(tensor_data, nullptr);
+}
+
+void *GetCvCudaTensorDataPtr(const nvcv::TensorWrapData &tensor) {
+  auto data =
+      dynamic_cast<const nvcv::ITensorDataStridedCuda *>(tensor.exportData());
+  return reinterpret_cast<void *>(data->basePtr());
+}
+
+nvcv::ImageWrapData CreateImageWrapData(const FDTensor &tensor) {
+  FDASSERT(tensor.shape.size() == 3,
+           "When create CVCUDA image from FD tensor,"
+           "tensor shape should be 3-Dim, HWC layout");
+  int h = tensor.Shape()[0];
+  int w = tensor.Shape()[1];
+  int c = tensor.Shape()[2];
+  nvcv::ImageDataStridedCuda::Buffer buf;
+  buf.numPlanes = 1;
+  buf.planes[0].width = w;
+  buf.planes[0].height = h;
+  buf.planes[0].rowStride = w * c * FDDataTypeSize(tensor.Dtype());
+  buf.planes[0].basePtr =
+      reinterpret_cast<NVCVByte *>(const_cast<void *>(tensor.Data()));
+  nvcv::ImageWrapData nvimg{nvcv::ImageDataStridedCuda{
+      nvcv::ImageFormat{CreateCvCudaImageFormat(tensor.Dtype(), c)}, buf}};
+  return nvimg;
+}
+
+void CreateCvCudaImageBatchVarShape(std::vector<FDTensor *> &tensors,
+                                    nvcv::ImageBatchVarShape &img_batch) {
+  for (size_t i = 0; i < tensors.size(); ++i) {
+    FDASSERT(tensors[i]->device == Device::GPU, "Tensor must on GPU.");
+    img_batch.pushBack(CreateImageWrapData(*(tensors[i])));
+  }
+}
+
+NVCVInterpolationType CreateCvCudaInterp(int interp) {
+  // CV-CUDA Interp value is compatible with OpenCV
+  auto nvcv_interp = NVCVInterpolationType(interp);
+
+  // Due to bug of CV-CUDA CUBIC resize, will force to convert CUBIC to LINEAR
+  if (nvcv_interp == NVCV_INTERP_CUBIC) {
+    return NVCV_INTERP_LINEAR;
+  }
+  return nvcv_interp;
+}
+#endif
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h
new file mode 100755
index 0000000000..701e88cb09
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/cvcuda_utils.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/Types.h>
+#include <nvcv/ImageBatch.hpp>
+#include <nvcv/Tensor.hpp>
+
+namespace ultrainfer {
+namespace vision {
+
+nvcv::ImageFormat CreateCvCudaImageFormat(FDDataType type, int channel,
+                                          bool interleaved = true);
+std::shared_ptr<nvcv::TensorWrapData>
+CreateCvCudaTensorWrapData(const FDTensor &tensor, Layout layout = Layout::HWC);
+void *GetCvCudaTensorDataPtr(const nvcv::TensorWrapData &tensor);
+nvcv::ImageWrapData CreateImageWrapData(const FDTensor &tensor);
+void CreateCvCudaImageBatchVarShape(std::vector<FDTensor *> &tensors,
+                                    nvcv::ImageBatchVarShape &img_batch);
+NVCVInterpolationType CreateCvCudaInterp(int interp);
+
+} // namespace vision
+} // namespace ultrainfer
+#endif
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc
new file mode 100755
index 0000000000..be32cf1809
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.cc
@@ -0,0 +1,93 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/hwc2chw.h"
+
+#include "ultrainfer/function/transpose.h"
+
+namespace ultrainfer {
+namespace vision {
+bool HWC2CHW::ImplByOpenCV(Mat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
+            << std::endl;
+    return false;
+  }
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Mat im_clone = im->clone();
+  int rh = im->rows;
+  int rw = im->cols;
+  int rc = im->channels();
+
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(
+        im_clone,
+        cv::Mat(rh, rw, im->type() % 8,
+                im->ptr() + i * rh * rw * FDDataTypeSize(mat->Type())),
+        i);
+  }
+  mat->layout = Layout::CHW;
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool HWC2CHW::ImplByFlyCV(Mat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
+            << std::endl;
+    return false;
+  }
+  if (mat->Type() != FDDataType::FP32) {
+    FDERROR << "HWC2CHW: Only support float data while use FlyCV, but now it's "
+            << mat->Type() << "." << std::endl;
+    return false;
+  }
+  fcv::Mat *im = mat->GetFlyCVMat();
+  fcv::Mat new_im;
+  fcv::normalize_to_submean_to_reorder(*im, {0.0, 0.0, 0.0}, {1.0, 1.0, 1.0},
+                                       std::vector<uint32_t>(), new_im, false);
+  mat->SetMat(new_im);
+  mat->layout = Layout::CHW;
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool HWC2CHW::ImplByCvCuda(FDMat *mat) {
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  // Prepare output tensor
+  mat->output_cache->Resize({mat->Channels(), mat->Height(), mat->Width()},
+                            src->Dtype(), "output_cache", Device::GPU);
+  auto dst_tensor =
+      CreateCvCudaTensorWrapData(*(mat->output_cache), Layout::CHW);
+
+  cvcuda_reformat_op_(mat->Stream(), *src_tensor, *dst_tensor);
+
+  mat->layout = Layout::CHW;
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
+bool HWC2CHW::Run(Mat *mat, ProcLib lib) {
+  auto h = HWC2CHW();
+  return h(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h
new file mode 100755
index 0000000000..6d042e724d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw.h
@@ -0,0 +1,54 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpReformat.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for transform images from HWC to CHW.
+ */
+class ULTRAINFER_DECL HWC2CHW : public Processor {
+public:
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+#endif
+  std::string Name() { return "HWC2CHW"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, ProcLib lib = ProcLib::DEFAULT);
+
+private:
+#ifdef ENABLE_CVCUDA
+  cvcuda::Reformat cvcuda_reformat_op_;
+#endif
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc
new file mode 100755
index 0000000000..67598277fa
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/hwc2chw_pybind.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindHWC2CHW(pybind11::module &m) {
+  pybind11::class_<vision::HWC2CHW, vision::Processor>(m, "HWC2CHW")
+      .def(pybind11::init<>(), "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc
new file mode 100755
index 0000000000..96934c2ab0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.cc
@@ -0,0 +1,86 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/limit_by_stride.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool LimitByStride::ImplByOpenCV(Mat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  int rw = origin_w - origin_w % stride_;
+  int rh = origin_h - origin_h % stride_;
+  if (rw == 0) {
+    rw = stride_;
+  }
+  if (rh == 0) {
+    rh = stride_;
+  }
+  if (rw != origin_w || rh != origin_w) {
+    cv::resize(*im, *im, cv::Size(rw, rh), 0, 0, interp_);
+    mat->SetWidth(im->cols);
+    mat->SetHeight(im->rows);
+  }
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool LimitByStride::ImplByFlyCV(Mat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  int origin_w = im->width();
+  int origin_h = im->height();
+  int rw = origin_w - origin_w % stride_;
+  int rh = origin_h - origin_h % stride_;
+  if (rw == 0) {
+    rw = stride_;
+  }
+  if (rh == 0) {
+    rh = stride_;
+  }
+  if (rw != origin_w || rh != origin_h) {
+    auto interp_method = fcv::InterpolationType::INTER_LINEAR;
+    if (interp_ == 0) {
+      interp_method = fcv::InterpolationType::INTER_NEAREST;
+    } else if (interp_ == 1) {
+      interp_method = fcv::InterpolationType::INTER_LINEAR;
+    } else if (interp_ == 2) {
+      interp_method = fcv::InterpolationType::INTER_CUBIC;
+    } else if (interp_ == 3) {
+      interp_method = fcv::InterpolationType::INTER_AREA;
+    } else {
+      FDERROR
+          << "LimitByStride: Only support interp_ be 0/1/2/3 with FlyCV, but "
+             "now it's "
+          << interp_ << "." << std::endl;
+      return false;
+    }
+
+    fcv::Mat new_im;
+    fcv::resize(*im, new_im, fcv::Size(rw, rh), 0, 0, interp_method);
+    mat->SetMat(new_im);
+    mat->SetWidth(new_im.width());
+    mat->SetHeight(new_im.height());
+  }
+  return true;
+}
+#endif
+
+bool LimitByStride::Run(Mat *mat, int stride, int interp, ProcLib lib) {
+  auto r = LimitByStride(stride, interp);
+  return r(mat, lib);
+}
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h
new file mode 100755
index 0000000000..f905a60e1c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_by_stride.h
@@ -0,0 +1,54 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for LimitByStride images with given paramters.
+ */
+class ULTRAINFER_DECL LimitByStride : public Processor {
+public:
+  explicit LimitByStride(int stride = 32, int interp = 1) {
+    stride_ = stride;
+    interp_ = interp;
+  }
+
+  // Resize Mat* mat to make the size divisible by stride_.
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+  std::string Name() { return "LimitByStride"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] stride limit image stride, deafult is 32
+   * \param[in] interp interpolation method, deafult is 1
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, int stride = 32, int interp = 1,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  int interp_;
+  int stride_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc
new file mode 100755
index 0000000000..93d80394cc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.cc
@@ -0,0 +1,93 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/limit_short.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool LimitShort::ImplByOpenCV(Mat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  int im_size_min = std::min(origin_w, origin_h);
+  int target = im_size_min;
+  if (max_short_ > 0 && im_size_min > max_short_) {
+    target = max_short_;
+  } else if (min_short_ > 0 && im_size_min < min_short_) {
+    target = min_short_;
+  }
+  double scale = -1.f;
+  if (target != im_size_min) {
+    scale = static_cast<double>(target) / static_cast<double>(im_size_min);
+  }
+  if (fabs(scale - 1.0) > 1e-06) {
+    cv::resize(*im, *im, cv::Size(), scale, scale, interp_);
+    mat->SetWidth(im->cols);
+    mat->SetHeight(im->rows);
+  }
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool LimitShort::ImplByFlyCV(Mat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  int origin_w = im->width();
+  int origin_h = im->height();
+  int im_size_min = std::min(origin_w, origin_h);
+  int target = im_size_min;
+  if (max_short_ > 0 && im_size_min > max_short_) {
+    target = max_short_;
+  } else if (min_short_ > 0 && im_size_min < min_short_) {
+    target = min_short_;
+  }
+  double scale = -1.f;
+  if (target != im_size_min) {
+    scale = static_cast<double>(target) / static_cast<double>(im_size_min);
+  }
+  if (fabs(scale - 1.0) > 1e-06) {
+    auto interp_method = fcv::InterpolationType::INTER_LINEAR;
+    if (interp_ == 0) {
+      interp_method = fcv::InterpolationType::INTER_NEAREST;
+    } else if (interp_ == 1) {
+      interp_method = fcv::InterpolationType::INTER_LINEAR;
+    } else if (interp_ == 2) {
+      interp_method = fcv::InterpolationType::INTER_CUBIC;
+    } else if (interp_ == 3) {
+      interp_method = fcv::InterpolationType::INTER_AREA;
+    } else {
+      FDERROR
+          << "LimitByShort: Only support interp_ be 0/1/2/3 with FlyCV, but "
+             "now it's "
+          << interp_ << "." << std::endl;
+      return false;
+    }
+
+    fcv::Mat new_im;
+    fcv::resize(*im, new_im, fcv::Size(), scale, scale, interp_method);
+    mat->SetMat(new_im);
+    mat->SetWidth(new_im.width());
+    mat->SetHeight(new_im.height());
+  }
+  return true;
+}
+#endif
+
+bool LimitShort::Run(Mat *mat, int max_short, int min_short, int interp,
+                     ProcLib lib) {
+  auto l = LimitShort(max_short, min_short, interp);
+  return l(mat, lib);
+}
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h
new file mode 100755
index 0000000000..cc991878c2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/limit_short.h
@@ -0,0 +1,62 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for Limit images by short edge with given paramters.
+ */
+class LimitShort : public Processor {
+public:
+  explicit LimitShort(int max_short = -1, int min_short = -1, int interp = 1) {
+    max_short_ = max_short;
+    min_short_ = min_short;
+    interp_ = interp;
+  }
+
+  // Limit the short edge of image.
+  // If the short edge is larger than max_short_, resize the short edge
+  // to max_short_, while scale the long edge proportionally.
+  // If the short edge is smaller than min_short_, resize the short edge
+  // to min_short_, while scale the long edge proportionally.
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+  std::string Name() { return "LimitShort"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data
+   * \param[in] max_short target size of short edge
+   * \param[in] min_short target size of short edge
+   * \param[in] interp interpolation method, deafult is 1
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, int max_short = -1, int min_short = -1,
+                  int interp = 1, ProcLib lib = ProcLib::DEFAULT);
+  int GetMaxShort() const { return max_short_; }
+
+private:
+  int max_short_;
+  int min_short_;
+  int interp_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc
new file mode 100755
index 0000000000..f03198aa07
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.cc
@@ -0,0 +1,102 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/common/processors/manager.h"
+
+namespace ultrainfer {
+namespace vision {
+
+ProcessorManager::~ProcessorManager() {
+#ifdef WITH_GPU
+  if (stream_)
+    cudaStreamDestroy(stream_);
+#endif
+}
+
+void ProcessorManager::UseCuda(bool enable_cv_cuda, int gpu_id) {
+#ifdef WITH_GPU
+  if (gpu_id >= 0) {
+    device_id_ = gpu_id;
+    FDASSERT(cudaSetDevice(device_id_) == cudaSuccess,
+             "[ERROR] Error occurs while setting cuda device.");
+  }
+  FDASSERT(cudaStreamCreate(&stream_) == cudaSuccess,
+           "[ERROR] Error occurs while creating cuda stream.");
+  proc_lib_ = ProcLib::CUDA;
+#else
+  FDASSERT(false, "UltraInfer didn't compile with WITH_GPU.");
+#endif
+
+  if (enable_cv_cuda) {
+#ifdef ENABLE_CVCUDA
+    proc_lib_ = ProcLib::CVCUDA;
+#else
+    FDASSERT(false, "UltraInfer didn't compile with CV-CUDA.");
+#endif
+  }
+}
+
+bool ProcessorManager::CudaUsed() {
+  return (proc_lib_ == ProcLib::CUDA || proc_lib_ == ProcLib::CVCUDA);
+}
+
+void ProcessorManager::PreApply(FDMatBatch *image_batch) {
+  FDASSERT(image_batch->mats != nullptr, "The mats is empty.");
+  FDASSERT(image_batch->mats->size() > 0,
+           "The size of input images should be greater than 0.");
+
+  if (image_batch->mats->size() > input_caches_.size()) {
+    input_caches_.resize(image_batch->mats->size());
+    output_caches_.resize(image_batch->mats->size());
+  }
+  image_batch->input_cache = &batch_input_cache_;
+  image_batch->output_cache = &batch_output_cache_;
+  image_batch->proc_lib = proc_lib_;
+  if (CudaUsed()) {
+    SetStream(image_batch);
+  }
+
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    mat->input_cache = &input_caches_[i];
+    mat->output_cache = &output_caches_[i];
+    mat->proc_lib = proc_lib_;
+    if (mat->mat_type == ProcLib::CUDA) {
+      // Make a copy of the input data ptr, so that the original data ptr of
+      // FDMat won't be modified.
+      auto fd_tensor = std::make_shared<FDTensor>();
+      fd_tensor->SetExternalData(mat->Tensor()->shape, mat->Tensor()->Dtype(),
+                                 mat->Tensor()->Data(), mat->Tensor()->device,
+                                 mat->Tensor()->device_id);
+      mat->SetTensor(fd_tensor);
+    }
+  }
+}
+
+void ProcessorManager::PostApply() {
+  if (CudaUsed()) {
+    SyncStream();
+  }
+}
+
+bool ProcessorManager::Run(std::vector<FDMat> *images,
+                           std::vector<FDTensor> *outputs) {
+  FDMatBatch image_batch(images);
+  PreApply(&image_batch);
+  bool ret = Apply(&image_batch, outputs);
+  PostApply();
+  return ret;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/manager.h b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.h
new file mode 100755
index 0000000000..80950e9db9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/manager.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/processors/base.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+#include "ultrainfer/vision/common/processors/mat_batch.h"
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief ProcessorManager for Preprocess
+ */
+class ULTRAINFER_DECL ProcessorManager {
+public:
+  ~ProcessorManager();
+
+  /** \brief Use CUDA to boost the performance of processors
+   *
+   * \param[in] enable_cv_cuda ture: use CV-CUDA, false: use CUDA only
+   * \param[in] gpu_id GPU device id
+   * \return true if the preprocess successed, otherwise false
+   */
+  void UseCuda(bool enable_cv_cuda = false, int gpu_id = -1);
+
+  bool CudaUsed();
+
+#ifdef WITH_GPU
+  cudaStream_t Stream() const { return stream_; }
+#endif
+
+  void SetStream(FDMat *mat) {
+#ifdef WITH_GPU
+    mat->SetStream(stream_);
+#endif
+  }
+
+  void SetStream(FDMatBatch *mat_batch) {
+#ifdef WITH_GPU
+    mat_batch->SetStream(stream_);
+#endif
+  }
+
+  void SyncStream() {
+#ifdef WITH_GPU
+    FDASSERT(cudaStreamSynchronize(stream_) == cudaSuccess,
+             "[ERROR] Error occurs while sync cuda stream.");
+#endif
+  }
+
+  int DeviceId() { return device_id_; }
+
+  /** \brief Process the input images and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs);
+
+  /** \brief Apply() is the body of Run() function, it needs to be implemented
+   * by a derived class
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch,
+                     std::vector<FDTensor> *outputs) = 0;
+
+  void PreApply(FDMatBatch *image_batch);
+
+  void PostApply();
+
+protected:
+  ProcLib proc_lib_ = ProcLib::DEFAULT;
+
+private:
+#ifdef WITH_GPU
+  cudaStream_t stream_ = nullptr;
+#endif
+  int device_id_ = -1;
+
+  std::vector<FDTensor> input_caches_;
+  std::vector<FDTensor> output_caches_;
+  FDTensor batch_input_cache_;
+  FDTensor batch_output_cache_;
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc
new file mode 100755
index 0000000000..e97a4aca6a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/manager_pybind.cc
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+namespace vision {
+// PyProcessorManager is used for pybind11::init() of ProcessorManager
+// Because ProcessorManager have a pure Virtual function Apply()
+class ULTRAINFER_DECL PyProcessorManager : public ProcessorManager {
+public:
+  using ProcessorManager::ProcessorManager;
+  bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs) override {
+    PYBIND11_OVERRIDE_PURE(bool, ProcessorManager, Apply, image_batch, outputs);
+  }
+};
+} // namespace vision
+
+void BindProcessorManager(pybind11::module &m) {
+  pybind11::class_<vision::ProcessorManager, vision::PyProcessorManager>(
+      m, "ProcessorManager")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::ProcessorManager &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error("Failed to process the input data");
+             }
+             if (!self.CudaUsed()) {
+               for (size_t i = 0; i < outputs.size(); ++i) {
+                 outputs[i].StopSharing();
+               }
+             }
+             return outputs;
+           })
+      .def("pre_apply", &vision::ProcessorManager::PreApply)
+      .def("post_apply", &vision::ProcessorManager::PostApply)
+      .def("use_cuda",
+           [](vision::ProcessorManager &self, bool enable_cv_cuda = false,
+              int gpu_id = -1) { self.UseCuda(enable_cv_cuda, gpu_id); });
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc
new file mode 100755
index 0000000000..23660879ec
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.cc
@@ -0,0 +1,337 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/common/processors/mat.h"
+
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/processors/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat *Mat::GetOpenCVMat() {
+  if (mat_type == ProcLib::OPENCV) {
+    return &cpu_mat;
+  } else if (mat_type == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    // Just a reference to fcv_mat, zero copy. After you
+    // call this method, cpu_mat and fcv_mat will point
+    // to the same memory buffer.
+    cpu_mat = ConvertFlyCVMatToOpenCV(fcv_mat);
+    mat_type = ProcLib::OPENCV;
+    return &cpu_mat;
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with FlyCV!");
+#endif
+  } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) {
+#ifdef WITH_GPU
+    FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess,
+             "[ERROR] Error occurs while sync cuda stream.");
+    cpu_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor, layout);
+    mat_type = ProcLib::OPENCV;
+    device = Device::CPU;
+    return &cpu_mat;
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with -DWITH_GPU=ON");
+#endif
+  } else {
+    FDASSERT(false, "The mat_type of custom Mat can not be ProcLib::DEFAULT");
+  }
+}
+
+#ifdef ENABLE_FLYCV
+fcv::Mat *Mat::GetFlyCVMat() {
+  if (mat_type == ProcLib::FLYCV) {
+    return &fcv_mat;
+  } else if (mat_type == ProcLib::OPENCV) {
+    // Just a reference to cpu_mat, zero copy. After you
+    // call this method, fcv_mat and cpu_mat will point
+    // to the same memory buffer.
+    fcv_mat = ConvertOpenCVMatToFlyCV(cpu_mat);
+    mat_type = ProcLib::FLYCV;
+    return &fcv_mat;
+  } else {
+    FDASSERT(false, "The mat_type of custom Mat can not be ProcLib::DEFAULT");
+  }
+}
+#endif
+
+void *Mat::Data() {
+  if (mat_type == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    return fcv_mat.data();
+#else
+    FDASSERT(false,
+             "UltraInfer didn't compile with FlyCV, but met data type with "
+             "fcv::Mat.");
+#endif
+  } else if (device == Device::GPU) {
+    return fd_tensor->Data();
+  }
+  return cpu_mat.ptr();
+}
+
+FDTensor *Mat::Tensor() {
+  if (mat_type == ProcLib::OPENCV) {
+    ShareWithTensor(fd_tensor.get());
+  } else if (mat_type == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    cpu_mat = ConvertFlyCVMatToOpenCV(fcv_mat);
+    mat_type = ProcLib::OPENCV;
+    ShareWithTensor(fd_tensor.get());
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with FlyCV!");
+#endif
+  }
+  return fd_tensor.get();
+}
+
+void Mat::SetTensor(FDTensor *tensor) {
+  fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
+                             tensor->device, tensor->device_id);
+  device = tensor->device;
+  if (layout == Layout::HWC) {
+    height = tensor->Shape()[0];
+    width = tensor->Shape()[1];
+    channels = tensor->Shape()[2];
+  } else if (layout == Layout::CHW) {
+    channels = tensor->Shape()[0];
+    height = tensor->Shape()[1];
+    width = tensor->Shape()[2];
+  }
+}
+
+void Mat::SetTensor(std::shared_ptr<FDTensor> &tensor) {
+  fd_tensor = tensor;
+  device = tensor->device;
+  if (layout == Layout::HWC) {
+    height = tensor->Shape()[0];
+    width = tensor->Shape()[1];
+    channels = tensor->Shape()[2];
+  } else if (layout == Layout::CHW) {
+    channels = tensor->Shape()[0];
+    height = tensor->Shape()[1];
+    width = tensor->Shape()[2];
+  }
+}
+
+void Mat::ShareWithTensor(FDTensor *tensor) {
+  tensor->SetExternalData({Channels(), Height(), Width()}, Type(), Data());
+  tensor->device = device;
+  if (layout == Layout::HWC) {
+    tensor->shape = {Height(), Width(), Channels()};
+  }
+}
+
+bool Mat::CopyToTensor(FDTensor *tensor) {
+  int total_bytes = Height() * Width() * Channels() * FDDataTypeSize(Type());
+  if (total_bytes != tensor->Nbytes()) {
+    FDERROR << "While copy Mat to Tensor, requires the memory size be same, "
+               "but now size of Tensor = "
+            << tensor->Nbytes() << ", size of Mat = " << total_bytes << "."
+            << std::endl;
+    return false;
+  }
+  memcpy(tensor->MutableData(), Data(), total_bytes);
+  return true;
+}
+
+void Mat::PrintInfo(const std::string &flag) {
+  std::cout << flag << ": "
+            << "DataType=" << Type() << ", "
+            << "Channel=" << Channels() << ", "
+            << "Height=" << Height() << ", "
+            << "Width=" << Width() << ", "
+            << "Mean=";
+  if (mat_type == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    fcv::Scalar mean = fcv::mean(fcv_mat);
+    for (int i = 0; i < Channels(); ++i) {
+      std::cout << mean[i] << " ";
+    }
+    std::cout << std::endl;
+#else
+    FDASSERT(false,
+             "UltraInfer didn't compile with FlyCV, but met data type with "
+             "fcv::Mat.");
+#endif
+  } else if (mat_type == ProcLib::OPENCV) {
+    cv::Scalar mean = cv::mean(cpu_mat);
+    for (int i = 0; i < Channels(); ++i) {
+      std::cout << mean[i] << " ";
+    }
+    std::cout << std::endl;
+  } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) {
+#ifdef WITH_GPU
+    FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess,
+             "[ERROR] Error occurs while sync cuda stream.");
+    cv::Mat tmp_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor, layout);
+    cv::Scalar mean = cv::mean(tmp_mat);
+    for (int i = 0; i < Channels(); ++i) {
+      std::cout << mean[i] << " ";
+    }
+    std::cout << std::endl;
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with -DWITH_GPU=ON");
+#endif
+  }
+}
+
+FDDataType Mat::Type() {
+  int type = -1;
+  if (mat_type == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    return FlyCVDataTypeToFD(fcv_mat.type());
+#else
+    FDASSERT(false,
+             "UltraInfer didn't compile with FlyCV, but met data type with "
+             "fcv::Mat.");
+#endif
+  } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) {
+    return fd_tensor->Dtype();
+  }
+  return OpenCVDataTypeToFD(cpu_mat.type());
+}
+
+Mat Mat::Create(const FDTensor &tensor) {
+  if (DefaultProcLib::default_lib == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    fcv::Mat tmp_fcv_mat = CreateZeroCopyFlyCVMatFromTensor(tensor);
+    Mat mat = Mat(tmp_fcv_mat);
+    return mat;
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with FlyCV!");
+#endif
+  }
+  cv::Mat tmp_ocv_mat = CreateZeroCopyOpenCVMatFromTensor(tensor);
+  Mat mat = Mat(tmp_ocv_mat);
+  return mat;
+}
+
+Mat Mat::Create(const FDTensor &tensor, ProcLib lib) {
+  if (lib == ProcLib::DEFAULT) {
+    return Create(tensor);
+  }
+  if (lib == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    fcv::Mat tmp_fcv_mat = CreateZeroCopyFlyCVMatFromTensor(tensor);
+    Mat mat = Mat(tmp_fcv_mat);
+    return mat;
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with FlyCV!");
+#endif
+  }
+  cv::Mat tmp_ocv_mat = CreateZeroCopyOpenCVMatFromTensor(tensor);
+  Mat mat = Mat(tmp_ocv_mat);
+  return mat;
+}
+
+Mat Mat::Create(int height, int width, int channels, FDDataType type,
+                void *data) {
+  if (DefaultProcLib::default_lib == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    fcv::Mat tmp_fcv_mat =
+        CreateZeroCopyFlyCVMatFromBuffer(height, width, channels, type, data);
+    Mat mat = Mat(tmp_fcv_mat);
+    return mat;
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with FlyCV!");
+#endif
+  }
+  cv::Mat tmp_ocv_mat =
+      CreateZeroCopyOpenCVMatFromBuffer(height, width, channels, type, data);
+  Mat mat = Mat(tmp_ocv_mat);
+  return mat;
+}
+
+Mat Mat::Create(int height, int width, int channels, FDDataType type,
+                void *data, ProcLib lib) {
+  if (lib == ProcLib::DEFAULT) {
+    return Create(height, width, channels, type, data);
+  }
+  if (lib == ProcLib::FLYCV) {
+#ifdef ENABLE_FLYCV
+    fcv::Mat tmp_fcv_mat =
+        CreateZeroCopyFlyCVMatFromBuffer(height, width, channels, type, data);
+    Mat mat = Mat(tmp_fcv_mat);
+    return mat;
+#else
+    FDASSERT(false, "UltraInfer didn't compiled with FlyCV!");
+#endif
+  }
+  cv::Mat tmp_ocv_mat =
+      CreateZeroCopyOpenCVMatFromBuffer(height, width, channels, type, data);
+  Mat mat = Mat(tmp_ocv_mat);
+  return mat;
+}
+
+FDMat WrapMat(const cv::Mat &image) {
+  FDMat mat(image);
+  return mat;
+}
+
+std::vector<FDMat> WrapMat(const std::vector<cv::Mat> &images) {
+  std::vector<FDMat> mats;
+  for (size_t i = 0; i < images.size(); ++i) {
+    mats.emplace_back(FDMat(images[i]));
+  }
+  return mats;
+}
+
+bool CheckShapeConsistency(std::vector<Mat> *mats) {
+  if (mats == nullptr) {
+    return true;
+  }
+  for (size_t i = 1; i < mats->size(); ++i) {
+    if ((*mats)[i].Channels() != (*mats)[0].Channels() ||
+        (*mats)[i].Width() != (*mats)[0].Width() ||
+        (*mats)[i].Height() != (*mats)[0].Height()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+FDTensor *CreateCachedGpuInputTensor(Mat *mat) {
+#ifdef WITH_GPU
+  FDTensor *src = mat->Tensor();
+  // Need to make sure the tensor is pointed to the input_cache.
+  if (src->Data() == mat->output_cache->Data()) {
+    std::swap(mat->input_cache, mat->output_cache);
+    std::swap(mat->input_cache->name, mat->output_cache->name);
+  }
+  if (src->device == Device::GPU) {
+    return src;
+  } else if (src->device == Device::CPU) {
+    // Tensor on CPU, we need copy it from CPU to GPU
+    FDASSERT(src->Shape().size() == 3, "The CPU tensor must has 3 dims.")
+    mat->output_cache->Resize(src->Shape(), src->Dtype(), "output_cache",
+                              Device::GPU);
+    FDASSERT(cudaMemcpyAsync(mat->output_cache->Data(), src->Data(),
+                             src->Nbytes(), cudaMemcpyHostToDevice,
+                             mat->Stream()) == 0,
+             "[ERROR] Error occurs while copy memory from CPU to GPU.");
+    std::swap(mat->input_cache, mat->output_cache);
+    std::swap(mat->input_cache->name, mat->output_cache->name);
+    return mat->input_cache;
+  } else {
+    FDASSERT(false, "FDMat is on unsupported device: %d", src->device);
+  }
+#else
+  FDASSERT(false, "UltraInfer didn't compile with WITH_GPU.");
+#endif
+  return nullptr;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat.h b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.h
new file mode 100755
index 0000000000..02fb71da96
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat.h
@@ -0,0 +1,176 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "opencv2/core/core.hpp"
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/vision/common/processors/proc_lib.h"
+
+#ifdef ENABLE_FLYCV
+#include "flycv.h" // NOLINT
+#endif
+
+#ifdef WITH_GPU
+#include <cuda_runtime_api.h>
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+enum Layout { HWC, CHW };
+
+/*! @brief FDMat is a structure for replace cv::Mat
+ */
+struct ULTRAINFER_DECL Mat {
+  Mat() = default;
+  explicit Mat(const cv::Mat &mat) {
+    cpu_mat = mat;
+    layout = Layout::HWC;
+    height = cpu_mat.rows;
+    width = cpu_mat.cols;
+    channels = cpu_mat.channels();
+    mat_type = ProcLib::OPENCV;
+  }
+
+#ifdef ENABLE_FLYCV
+  explicit Mat(const fcv::Mat &mat) {
+    fcv_mat = mat;
+    layout = Layout::HWC;
+    height = fcv_mat.height();
+    width = fcv_mat.width();
+    channels = fcv_mat.channels();
+    mat_type = ProcLib::FLYCV;
+  }
+#endif
+
+  Mat(const Mat &mat) = default;
+  Mat &operator=(const Mat &mat) = default;
+
+  // Move constructor
+  Mat(Mat &&other) = default;
+
+  // Careful if you use this interface
+  // this only used if you don't want to write
+  // the original data, and write to a new cv::Mat
+  // then replace the old cv::Mat of this structure
+  void SetMat(const cv::Mat &mat) {
+    cpu_mat = mat;
+    mat_type = ProcLib::OPENCV;
+  }
+
+  cv::Mat *GetOpenCVMat();
+
+#ifdef ENABLE_FLYCV
+  void SetMat(const fcv::Mat &mat) {
+    fcv_mat = mat;
+    mat_type = ProcLib::FLYCV;
+  }
+  fcv::Mat *GetFlyCVMat();
+#endif
+
+  void *Data();
+
+  // Get fd_tensor
+  FDTensor *Tensor();
+
+  // Set fd_tensor
+  void SetTensor(FDTensor *tensor);
+
+  void SetTensor(std::shared_ptr<FDTensor> &tensor);
+
+private:
+  int channels;
+  int height;
+  int width;
+  cv::Mat cpu_mat;
+#ifdef ENABLE_FLYCV
+  fcv::Mat fcv_mat;
+#endif
+#ifdef WITH_GPU
+  cudaStream_t stream = nullptr;
+#endif
+  // Currently, fd_tensor is only used by CUDA and CV-CUDA,
+  // OpenCV and FlyCV are not using it.
+  std::shared_ptr<FDTensor> fd_tensor = std::make_shared<FDTensor>();
+
+public:
+  FDDataType Type();
+  int Channels() const { return channels; }
+  int Width() const { return width; }
+  int Height() const { return height; }
+  void SetChannels(int s) { channels = s; }
+  void SetWidth(int w) { width = w; }
+  void SetHeight(int h) { height = h; }
+
+  // When using CV-CUDA/CUDA, please set input/output cache,
+  // refer to manager.cc
+  FDTensor *input_cache = nullptr;
+  FDTensor *output_cache = nullptr;
+#ifdef WITH_GPU
+  cudaStream_t Stream() const { return stream; }
+  void SetStream(cudaStream_t s) { stream = s; }
+#endif
+
+  // Transfer the vision::Mat to FDTensor
+  void ShareWithTensor(FDTensor *tensor);
+  // Only support copy to cpu tensor now
+  bool CopyToTensor(FDTensor *tensor);
+
+  // Debug functions
+  // TODO(jiangjiajun) Develop a right process pipeline with c++
+  // is not a easy things, Will add more debug function here to
+  // help debug processed image. This function will print shape
+  // and mean of each channels of the Mat
+  void PrintInfo(const std::string &flag);
+
+  ProcLib mat_type = ProcLib::OPENCV;
+  Layout layout = Layout::HWC;
+  Device device = Device::CPU;
+  ProcLib proc_lib = ProcLib::DEFAULT;
+
+  // Create FD Mat from FD Tensor. This method only create a
+  // new FD Mat with zero copy and it's data pointer is reference
+  // to the original memory buffer of input FD Tensor. Carefully,
+  // any operation on this Mat may change memory that points to
+  // FDTensor. We assume that the memory Mat points to is mutable.
+  // This method will create a FD Mat according to current global
+  // default ProcLib (OPENCV,FLYCV,...).
+  static Mat Create(const FDTensor &tensor);
+  static Mat Create(const FDTensor &tensor, ProcLib lib);
+  static Mat Create(int height, int width, int channels, FDDataType type,
+                    void *data);
+  static Mat Create(int height, int width, int channels, FDDataType type,
+                    void *data, ProcLib lib);
+};
+
+typedef Mat FDMat;
+/*
+ * @brief Wrap a cv::Mat to FDMat, there's no memory copy, memory buffer is
+ * managed by user
+ */
+ULTRAINFER_DECL FDMat WrapMat(const cv::Mat &image);
+/*
+ * Warp a vector<cv::Mat> to vector<FDMat>, there's no memory copy, memory
+ * buffer is managed by user
+ */
+ULTRAINFER_DECL std::vector<FDMat> WrapMat(const std::vector<cv::Mat> &images);
+
+bool CheckShapeConsistency(std::vector<Mat> *mats);
+
+// Create an input tensor on GPU and save into input_cache.
+// If the Mat is on GPU, return the mat->Tensor() directly.
+// If the Mat is on CPU, then update the input cache tensor and copy the mat's
+// CPU tensor to this new GPU input cache tensor.
+FDTensor *CreateCachedGpuInputTensor(Mat *mat);
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc
new file mode 100755
index 0000000000..88f4f53769
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/common/processors/mat_batch.h"
+
+namespace ultrainfer {
+namespace vision {
+
+#ifdef WITH_GPU
+void FDMatBatch::SetStream(cudaStream_t s) {
+  stream = s;
+  for (size_t i = 0; i < mats->size(); ++i) {
+    (*mats)[i].SetStream(s);
+  }
+}
+#endif
+
+FDTensor *FDMatBatch::Tensor() {
+  if (has_batched_tensor) {
+    return fd_tensor.get();
+  }
+  FDASSERT(mats != nullptr, "Failed to get batched tensor, Mats are empty.");
+  FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent.");
+  // Each mat has its own tensor,
+  // to get a batched tensor, we need copy these tensors to a batched tensor
+  FDTensor *src = (*mats)[0].Tensor();
+  device = src->device;
+  auto new_shape = src->Shape();
+  new_shape.insert(new_shape.begin(), mats->size());
+  input_cache->Resize(new_shape, src->Dtype(), "batch_input_cache", device);
+  for (size_t i = 0; i < mats->size(); ++i) {
+    FDASSERT(device == (*mats)[i].Tensor()->device,
+             "Mats and MatBatch are not on the same device");
+    uint8_t *p = reinterpret_cast<uint8_t *>(input_cache->Data());
+    int num_bytes = (*mats)[i].Tensor()->Nbytes();
+    FDTensor::CopyBuffer(p + i * num_bytes, (*mats)[i].Tensor()->Data(),
+                         num_bytes, device, false);
+  }
+  SetTensor(input_cache);
+  return fd_tensor.get();
+}
+
+void FDMatBatch::SetTensor(FDTensor *tensor) {
+  fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
+                             tensor->device, tensor->device_id);
+  device = tensor->device;
+  has_batched_tensor = true;
+}
+
+FDTensor *CreateCachedGpuInputTensor(FDMatBatch *mat_batch) {
+#ifdef WITH_GPU
+  // Get the batched tensor
+  FDTensor *src = mat_batch->Tensor();
+  // Need to make sure the returned tensor is pointed to the input_cache.
+  if (src->Data() == mat_batch->output_cache->Data()) {
+    std::swap(mat_batch->input_cache, mat_batch->output_cache);
+    std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name);
+  }
+  if (src->device == Device::GPU) {
+    return src;
+  } else if (src->device == Device::CPU) {
+    // Batched tensor on CPU, we need copy it to GPU
+    mat_batch->output_cache->Resize(src->Shape(), src->Dtype(), "output_cache",
+                                    Device::GPU);
+    FDASSERT(cudaMemcpyAsync(mat_batch->output_cache->Data(), src->Data(),
+                             src->Nbytes(), cudaMemcpyHostToDevice,
+                             mat_batch->Stream()) == 0,
+             "[ERROR] Error occurs while copy memory from CPU to GPU.");
+    std::swap(mat_batch->input_cache, mat_batch->output_cache);
+    std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name);
+    return mat_batch->input_cache;
+  } else {
+    FDASSERT(false, "FDMatBatch is on unsupported device: %d", src->device);
+  }
+#else
+  FDASSERT(false, "UltraInfer didn't compile with WITH_GPU.");
+#endif
+  return nullptr;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h
new file mode 100755
index 0000000000..0c11915786
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch.h
@@ -0,0 +1,83 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/vision/common/processors/mat.h"
+
+#ifdef WITH_GPU
+#include <cuda_runtime_api.h>
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+enum FDMatBatchLayout { NHWC, NCHW };
+
+/*! @brief FDMatBatch contains batch data for preprocess
+ */
+struct ULTRAINFER_DECL FDMatBatch {
+  FDMatBatch() = default;
+
+  // MatBatch is intialized with a list of mats,
+  // the data is stored in the mats separately.
+  // Call Tensor() function to get a batched 4-dimension tensor.
+  explicit FDMatBatch(std::vector<FDMat> *_mats) {
+    mats = _mats;
+    layout = FDMatBatchLayout::NHWC;
+    mat_type = ProcLib::OPENCV;
+  }
+
+  // Get the batched 4-dimension tensor.
+  FDTensor *Tensor();
+
+  void SetTensor(FDTensor *tensor);
+
+private:
+#ifdef WITH_GPU
+  cudaStream_t stream = nullptr;
+#endif
+  std::shared_ptr<FDTensor> fd_tensor = std::make_shared<FDTensor>();
+
+public:
+  // When using CV-CUDA/CUDA, please set input/output cache,
+  // refer to manager.cc
+  FDTensor *input_cache;
+  FDTensor *output_cache;
+#ifdef WITH_GPU
+  cudaStream_t Stream() const { return stream; }
+  void SetStream(cudaStream_t s);
+#endif
+
+  std::vector<FDMat> *mats = nullptr;
+
+  // Used by pybind, since python cannot pass list as pointer or reference
+  std::vector<FDMat> mats_holder;
+
+  ProcLib mat_type = ProcLib::OPENCV;
+  FDMatBatchLayout layout = FDMatBatchLayout::NHWC;
+  Device device = Device::CPU;
+  ProcLib proc_lib = ProcLib::DEFAULT;
+
+  // False: the data is stored in the mats separately
+  // True: the data is stored in the fd_tensor continuously in 4 dimensions
+  bool has_batched_tensor = false;
+};
+
+// Create a batched input tensor on GPU and save into input_cache.
+// If the MatBatch is on GPU, return the Tensor() directly.
+// If the MatBatch is on CPU, then copy the CPU tensors to GPU and get a GPU
+// batched input tensor.
+FDTensor *CreateCachedGpuInputTensor(FDMatBatch *mat_batch);
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc
new file mode 100755
index 0000000000..447d07e83e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_batch_pybind.cc
@@ -0,0 +1,30 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindFDMatBatch(pybind11::module &m) {
+  pybind11::class_<vision::FDMatBatch>(m, "FDMatBatch")
+      .def(pybind11::init<>(), "Default constructor")
+      .def_readwrite("input_cache", &vision::FDMatBatch::input_cache)
+      .def_readwrite("output_cache", &vision::FDMatBatch::output_cache)
+      .def_readwrite("mats", &vision::FDMatBatch::mats)
+      .def("from_mats",
+           [](vision::FDMatBatch &self, std::vector<vision::FDMat> &_mats) {
+             self.mats_holder = _mats;
+             self.mats = &(self.mats_holder);
+           });
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc
new file mode 100755
index 0000000000..2bf591e1c7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/mat_pybind.cc
@@ -0,0 +1,29 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindFDMat(pybind11::module &m) {
+  pybind11::class_<vision::FDMat>(m, "FDMat")
+      .def(pybind11::init<>(), "Default constructor")
+      .def_readwrite("input_cache", &vision::FDMat::input_cache)
+      .def_readwrite("output_cache", &vision::FDMat::output_cache)
+      .def("from_numpy",
+           [](vision::FDMat &self, pybind11::array &pyarray) {
+             self = vision::WrapMat(PyArrayToCvMat(pyarray));
+           })
+      .def("print_info", &vision::FDMat::PrintInfo);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc
new file mode 100755
index 0000000000..7f5cc49086
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cc
@@ -0,0 +1,107 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/normalize.h"
+
+namespace ultrainfer {
+namespace vision {
+Normalize::Normalize(const std::vector<float> &mean,
+                     const std::vector<float> &std, bool is_scale,
+                     const std::vector<float> &min,
+                     const std::vector<float> &max, bool swap_rb) {
+  FDASSERT(mean.size() == std.size(),
+           "Normalize: requires the size of mean equal to the size of std.");
+  std::vector<double> mean_(mean.begin(), mean.end());
+  std::vector<double> std_(std.begin(), std.end());
+  std::vector<double> min_(mean.size(), 0.0);
+  std::vector<double> max_(mean.size(), 255.0);
+  if (min.size() != 0) {
+    FDASSERT(
+        min.size() == mean.size(),
+        "Normalize: while min is defined, requires the size of min equal to "
+        "the size of mean.");
+    min_.assign(min.begin(), min.end());
+  }
+  if (max.size() != 0) {
+    FDASSERT(
+        min.size() == mean.size(),
+        "Normalize: while max is defined, requires the size of max equal to "
+        "the size of mean.");
+    max_.assign(max.begin(), max.end());
+  }
+  for (auto c = 0; c < mean_.size(); ++c) {
+    double alpha = 1.0;
+    if (is_scale) {
+      alpha /= (max_[c] - min_[c]);
+    }
+    double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c];
+    alpha /= std_[c];
+    alpha_.push_back(alpha);
+    beta_.push_back(beta);
+  }
+  swap_rb_ = swap_rb;
+}
+
+bool Normalize::ImplByOpenCV(Mat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  if (swap_rb_)
+    std::swap(split_im[0], split_im[2]);
+  for (int c = 0; c < im->channels(); c++) {
+    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
+  }
+  cv::merge(split_im, *im);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool Normalize::ImplByFlyCV(Mat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "Only supports 3-channels image in FlyCV, but now it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+
+  std::vector<float> mean(3, 0);
+  std::vector<float> std(3, 0);
+  for (size_t i = 0; i < 3; ++i) {
+    std[i] = 1.0 / alpha_[i];
+    mean[i] = -1 * beta_[i] * std[i];
+  }
+
+  std::vector<uint32_t> channel_reorder_index = {0, 1, 2};
+  if (swap_rb_)
+    std::swap(channel_reorder_index[0], channel_reorder_index[2]);
+
+  fcv::Mat new_im(im->width(), im->height(), fcv::FCVImageType::PKG_BGR_F32);
+  fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index,
+                                       new_im, true);
+  mat->SetMat(new_im);
+  return true;
+}
+#endif
+
+bool Normalize::Run(Mat *mat, const std::vector<float> &mean,
+                    const std::vector<float> &std, bool is_scale,
+                    const std::vector<float> &min,
+                    const std::vector<float> &max, ProcLib lib, bool swap_rb) {
+  auto n = Normalize(mean, std, is_scale, min, max, swap_rb);
+  return n(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu
new file mode 100755
index 0000000000..1f31689898
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.cu
@@ -0,0 +1,117 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef WITH_GPU
+#include "ultrainfer/vision/common/processors/normalize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+__global__ void NormalizeKernel(const uint8_t *src, float *dst,
+                                const float *alpha, const float *beta,
+                                int num_channel, bool swap_rb, int batch_size,
+                                int edge) {
+  int idx = blockDim.x * blockIdx.x + threadIdx.x;
+  if (idx >= edge)
+    return;
+
+  int img_size = edge / batch_size;
+  int n = idx / img_size;       // batch index
+  int p = idx - (n * img_size); // pixel index within the image
+
+  for (int i = 0; i < num_channel; ++i) {
+    int j = i;
+    if (swap_rb) {
+      j = 2 - i;
+    }
+    dst[num_channel * idx + j] =
+        src[num_channel * idx + j] * alpha[i] + beta[i];
+  }
+}
+
+bool Normalize::ImplByCuda(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "The input data must be NHWC format!" << std::endl;
+    return false;
+  }
+
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  src->ExpandDim(0);
+  FDMatBatch mat_batch;
+  mat_batch.SetTensor(src);
+  mat_batch.mat_type = ProcLib::CUDA;
+  mat_batch.input_cache = mat->input_cache;
+  mat_batch.output_cache = mat->output_cache;
+
+  bool ret = ImplByCuda(&mat_batch);
+
+  FDTensor *dst = mat_batch.Tensor();
+  dst->Squeeze(0);
+  mat->SetTensor(dst);
+  mat->mat_type = ProcLib::CUDA;
+  return true;
+}
+
+bool Normalize::ImplByCuda(FDMatBatch *mat_batch) {
+  if (mat_batch->layout != FDMatBatchLayout::NHWC) {
+    FDERROR << "The input data must be NHWC format!" << std::endl;
+    return false;
+  }
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat_batch);
+
+  // Prepare output tensor
+  mat_batch->output_cache->Resize(src->Shape(), FDDataType::FP32,
+                                  "batch_output_cache", Device::GPU);
+
+  // Copy alpha and beta to GPU
+  gpu_alpha_.Resize({1, 1, static_cast<int>(alpha_.size())}, FDDataType::FP32,
+                    "alpha", Device::GPU);
+  cudaMemcpy(gpu_alpha_.Data(), alpha_.data(), gpu_alpha_.Nbytes(),
+             cudaMemcpyHostToDevice);
+
+  gpu_beta_.Resize({1, 1, static_cast<int>(beta_.size())}, FDDataType::FP32,
+                   "beta", Device::GPU);
+  cudaMemcpy(gpu_beta_.Data(), beta_.data(), gpu_beta_.Nbytes(),
+             cudaMemcpyHostToDevice);
+
+  int jobs =
+      mat_batch->output_cache->Numel() / mat_batch->output_cache->shape[3];
+  int threads = 256;
+  int blocks = ceil(jobs / (float)threads);
+  NormalizeKernel<<<blocks, threads, 0, mat_batch->Stream()>>>(
+      reinterpret_cast<uint8_t *>(src->Data()),
+      reinterpret_cast<float *>(mat_batch->output_cache->Data()),
+      reinterpret_cast<float *>(gpu_alpha_.Data()),
+      reinterpret_cast<float *>(gpu_beta_.Data()),
+      mat_batch->output_cache->shape[3], swap_rb_,
+      mat_batch->output_cache->shape[0], jobs);
+
+  mat_batch->SetTensor(mat_batch->output_cache);
+  mat_batch->mat_type = ProcLib::CUDA;
+  return true;
+}
+
+#ifdef ENABLE_CVCUDA
+bool Normalize::ImplByCvCuda(FDMat *mat) { return ImplByCuda(mat); }
+
+bool Normalize::ImplByCvCuda(FDMatBatch *mat_batch) {
+  return ImplByCuda(mat_batch);
+}
+#endif
+
+} // namespace vision
+} // namespace ultrainfer
+#endif
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h
new file mode 100755
index 0000000000..7625ef35aa
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize.h
@@ -0,0 +1,90 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+/*! @brief Processor for Normalize images with given paramters.
+ */
+class ULTRAINFER_DECL Normalize : public Processor {
+public:
+  Normalize(const std::vector<float> &mean, const std::vector<float> &std,
+            bool is_scale = true,
+            const std::vector<float> &min = std::vector<float>(),
+            const std::vector<float> &max = std::vector<float>(),
+            bool swap_rb = false);
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+#ifdef WITH_GPU
+  bool ImplByCuda(FDMat *mat);
+  bool ImplByCuda(FDMatBatch *mat_batch);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+  bool ImplByCvCuda(FDMatBatch *mat_batch);
+#endif
+  std::string Name() { return "Normalize"; }
+
+  // While use normalize, it is more recommend not use this function
+  // this function will need to compute result = ((mat / 255) - mean) / std
+  // if we use the following method
+  // ```
+  // auto norm = Normalize(...)
+  // norm(mat)
+  // ```
+  // There will be some precomputation in contruct function
+  // and the `norm(mat)` only need to compute result = mat * alpha + beta
+  // which will reduce lots of time
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data, `result = mat * alpha + beta`
+   * \param[in] mean target mean vector of output images
+   * \param[in] std target std vector of output images
+   * \param[in] max max value vector to be in target image
+   * \param[in] min min value vector to be in target image
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \param[in] swap_rb to define whether to swap r and b channel order
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, const std::vector<float> &mean,
+                  const std::vector<float> &std, bool is_scale = true,
+                  const std::vector<float> &min = std::vector<float>(),
+                  const std::vector<float> &max = std::vector<float>(),
+                  ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false);
+
+  std::vector<float> GetAlpha() const { return alpha_; }
+  std::vector<float> GetBeta() const { return beta_; }
+
+  bool GetSwapRB() { return swap_rb_; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] swap_rb set the value of the swap_rb parameter
+   */
+  void SetSwapRB(bool swap_rb) { swap_rb_ = swap_rb; }
+
+private:
+  std::vector<float> alpha_;
+  std::vector<float> beta_;
+  FDTensor gpu_alpha_;
+  FDTensor gpu_beta_;
+  bool swap_rb_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc
new file mode 100755
index 0000000000..d724b38ac4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cc
@@ -0,0 +1,124 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/normalize_and_permute.h"
+
+namespace ultrainfer {
+namespace vision {
+
+NormalizeAndPermute::NormalizeAndPermute(const std::vector<float> &mean,
+                                         const std::vector<float> &std,
+                                         bool is_scale,
+                                         const std::vector<float> &min,
+                                         const std::vector<float> &max,
+                                         bool swap_rb) {
+  FDASSERT(mean.size() == std.size(),
+           "Normalize: requires the size of mean equal to the size of std.");
+  std::vector<double> mean_(mean.begin(), mean.end());
+  std::vector<double> std_(std.begin(), std.end());
+  std::vector<double> min_(mean.size(), 0.0);
+  std::vector<double> max_(mean.size(), 255.0);
+  if (min.size() != 0) {
+    FDASSERT(
+        min.size() == mean.size(),
+        "Normalize: while min is defined, requires the size of min equal to "
+        "the size of mean.");
+    min_.assign(min.begin(), min.end());
+  }
+  if (max.size() != 0) {
+    FDASSERT(
+        min.size() == mean.size(),
+        "Normalize: while max is defined, requires the size of max equal to "
+        "the size of mean.");
+    max_.assign(max.begin(), max.end());
+  }
+  for (auto c = 0; c < mean_.size(); ++c) {
+    double alpha = 1.0;
+    if (is_scale) {
+      alpha /= (max_[c] - min_[c]);
+    }
+    double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c];
+    alpha /= std_[c];
+    alpha_.push_back(alpha);
+    beta_.push_back(beta);
+  }
+  swap_rb_ = swap_rb;
+}
+
+bool NormalizeAndPermute::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  if (swap_rb_)
+    std::swap(split_im[0], split_im[2]);
+  for (int c = 0; c < im->channels(); c++) {
+    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
+  }
+  cv::Mat res(origin_h, origin_w, CV_32FC(im->channels()));
+  for (int i = 0; i < im->channels(); ++i) {
+    cv::extractChannel(split_im[i],
+                       cv::Mat(origin_h, origin_w, CV_32FC1,
+                               res.ptr() + i * origin_h * origin_w * 4),
+                       0);
+  }
+  mat->SetMat(res);
+  mat->layout = Layout::CHW;
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool NormalizeAndPermute::ImplByFlyCV(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Only supports input with HWC layout." << std::endl;
+    return false;
+  }
+  fcv::Mat *im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "Only supports 3-channels image in FlyCV, but now it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+  std::vector<float> mean(3, 0);
+  std::vector<float> std(3, 0);
+  for (size_t i = 0; i < 3; ++i) {
+    std[i] = 1.0 / alpha_[i];
+    mean[i] = -1 * beta_[i] * std[i];
+  }
+
+  std::vector<uint32_t> channel_reorder_index = {0, 1, 2};
+  if (swap_rb_)
+    std::swap(channel_reorder_index[0], channel_reorder_index[2]);
+
+  fcv::Mat new_im;
+  fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index,
+                                       new_im, false);
+  mat->SetMat(new_im);
+  mat->layout = Layout::CHW;
+  return true;
+}
+#endif
+
+bool NormalizeAndPermute::Run(FDMat *mat, const std::vector<float> &mean,
+                              const std::vector<float> &std, bool is_scale,
+                              const std::vector<float> &min,
+                              const std::vector<float> &max, ProcLib lib,
+                              bool swap_rb) {
+  auto n = NormalizeAndPermute(mean, std, is_scale, min, max, swap_rb);
+  return n(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu
new file mode 100755
index 0000000000..177bb1c193
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.cu
@@ -0,0 +1,134 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef WITH_GPU
+#include "ultrainfer/vision/common/processors/normalize_and_permute.h"
+
+namespace ultrainfer {
+namespace vision {
+
+__global__ void NormalizeAndPermuteKernel(const uint8_t *src, float *dst,
+                                          const float *alpha, const float *beta,
+                                          int num_channel, bool swap_rb,
+                                          int batch_size, int edge) {
+  int idx = blockDim.x * blockIdx.x + threadIdx.x;
+  if (idx >= edge)
+    return;
+
+  int img_size = edge / batch_size;
+  int n = idx / img_size;       // batch index
+  int p = idx - (n * img_size); // pixel index within the image
+
+  for (int i = 0; i < num_channel; ++i) {
+    int j = i;
+    if (swap_rb) {
+      j = 2 - i;
+    }
+    dst[n * img_size * num_channel + i * img_size + p] =
+        src[num_channel * idx + j] * alpha[i] + beta[i];
+  }
+}
+
+bool NormalizeAndPermute::ImplByCuda(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Only supports input with HWC layout." << std::endl;
+    return false;
+  }
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+
+  // Prepare output tensor
+  mat->output_cache->Resize({src->shape[2], src->shape[0], src->shape[1]},
+                            FDDataType::FP32, "output_cache", Device::GPU);
+
+  // Copy alpha and beta to GPU
+  gpu_alpha_.Resize({1, 1, static_cast<int>(alpha_.size())}, FDDataType::FP32,
+                    "alpha", Device::GPU);
+  cudaMemcpy(gpu_alpha_.Data(), alpha_.data(), gpu_alpha_.Nbytes(),
+             cudaMemcpyHostToDevice);
+
+  gpu_beta_.Resize({1, 1, static_cast<int>(beta_.size())}, FDDataType::FP32,
+                   "beta", Device::GPU);
+  cudaMemcpy(gpu_beta_.Data(), beta_.data(), gpu_beta_.Nbytes(),
+             cudaMemcpyHostToDevice);
+
+  int jobs = 1 * mat->Width() * mat->Height();
+  int threads = 256;
+  int blocks = ceil(jobs / (float)threads);
+  NormalizeAndPermuteKernel<<<blocks, threads, 0, mat->Stream()>>>(
+      reinterpret_cast<uint8_t *>(src->Data()),
+      reinterpret_cast<float *>(mat->output_cache->Data()),
+      reinterpret_cast<float *>(gpu_alpha_.Data()),
+      reinterpret_cast<float *>(gpu_beta_.Data()), mat->Channels(), swap_rb_, 1,
+      jobs);
+
+  mat->layout = Layout::CHW;
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CUDA;
+  return true;
+}
+
+bool NormalizeAndPermute::ImplByCuda(FDMatBatch *mat_batch) {
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat_batch);
+
+  // Prepare output tensor
+  mat_batch->output_cache->Resize(src->Shape(), FDDataType::FP32,
+                                  "batch_output_cache", Device::GPU);
+  // NHWC -> NCHW
+  std::swap(mat_batch->output_cache->shape[1],
+            mat_batch->output_cache->shape[3]);
+  std::swap(mat_batch->output_cache->shape[2],
+            mat_batch->output_cache->shape[3]);
+
+  // Copy alpha and beta to GPU
+  gpu_alpha_.Resize({1, 1, static_cast<int>(alpha_.size())}, FDDataType::FP32,
+                    "alpha", Device::GPU);
+  cudaMemcpy(gpu_alpha_.Data(), alpha_.data(), gpu_alpha_.Nbytes(),
+             cudaMemcpyHostToDevice);
+
+  gpu_beta_.Resize({1, 1, static_cast<int>(beta_.size())}, FDDataType::FP32,
+                   "beta", Device::GPU);
+  cudaMemcpy(gpu_beta_.Data(), beta_.data(), gpu_beta_.Nbytes(),
+             cudaMemcpyHostToDevice);
+
+  int jobs =
+      mat_batch->output_cache->Numel() / mat_batch->output_cache->shape[1];
+  int threads = 256;
+  int blocks = ceil(jobs / (float)threads);
+  NormalizeAndPermuteKernel<<<blocks, threads, 0, mat_batch->Stream()>>>(
+      reinterpret_cast<uint8_t *>(src->Data()),
+      reinterpret_cast<float *>(mat_batch->output_cache->Data()),
+      reinterpret_cast<float *>(gpu_alpha_.Data()),
+      reinterpret_cast<float *>(gpu_beta_.Data()),
+      mat_batch->output_cache->shape[1], swap_rb_,
+      mat_batch->output_cache->shape[0], jobs);
+
+  mat_batch->SetTensor(mat_batch->output_cache);
+  mat_batch->layout = FDMatBatchLayout::NCHW;
+  mat_batch->mat_type = ProcLib::CUDA;
+  return true;
+}
+
+#ifdef ENABLE_CVCUDA
+bool NormalizeAndPermute::ImplByCvCuda(FDMat *mat) { return ImplByCuda(mat); }
+
+bool NormalizeAndPermute::ImplByCvCuda(FDMatBatch *mat_batch) {
+  return ImplByCuda(mat_batch);
+}
+#endif
+
+} // namespace vision
+} // namespace ultrainfer
+#endif
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h
new file mode 100755
index 0000000000..877749dcd0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute.h
@@ -0,0 +1,107 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+/*! @brief Processor for Normalize and Permute images from HWC to CHW.
+ */
+class ULTRAINFER_DECL NormalizeAndPermute : public Processor {
+public:
+  NormalizeAndPermute(const std::vector<float> &mean,
+                      const std::vector<float> &std, bool is_scale = true,
+                      const std::vector<float> &min = std::vector<float>(),
+                      const std::vector<float> &max = std::vector<float>(),
+                      bool swap_rb = false);
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+#ifdef WITH_GPU
+  bool ImplByCuda(FDMat *mat);
+  bool ImplByCuda(FDMatBatch *mat_batch);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+  bool ImplByCvCuda(FDMatBatch *mat_batch);
+#endif
+  std::string Name() { return "NormalizeAndPermute"; }
+
+  // While use normalize, it is more recommend not use this function
+  // this function will need to compute result = ((mat / 255) - mean) / std
+  // if we use the following method
+  // ```
+  // auto norm = Normalize(...)
+  // norm(mat)
+  // ```
+  // There will be some precomputation in contruct function
+  // and the `norm(mat)` only need to compute result = mat * alpha + beta
+  // which will reduce lots of time
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data, `result = mat * alpha + beta`
+   * \param[in] mean target mean vector of output images
+   * \param[in] std target std vector of output images
+   * \param[in] max max value vector to be in target image
+   * \param[in] min min value vector to be in target image
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \param[in] swap_rb to define whether to swap r and b channel order
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, const std::vector<float> &mean,
+                  const std::vector<float> &std, bool is_scale = true,
+                  const std::vector<float> &min = std::vector<float>(),
+                  const std::vector<float> &max = std::vector<float>(),
+                  ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false);
+
+  /** \brief Process the input images
+   *
+   * \param[in] alpha set the value of the alpha parameter
+   */
+  void SetAlpha(const std::vector<float> &alpha) {
+    alpha_.clear();
+    std::vector<float>().swap(alpha_);
+    alpha_.assign(alpha.begin(), alpha.end());
+  }
+
+  /** \brief Process the input images
+   *
+   * \param[in] beta set the value of the beta parameter
+   */
+  void SetBeta(const std::vector<float> &beta) {
+    beta_.clear();
+    std::vector<float>().swap(beta_);
+    beta_.assign(beta.begin(), beta.end());
+  }
+
+  bool GetSwapRB() { return swap_rb_; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] swap_rb set the value of the swap_rb parameter
+   */
+  void SetSwapRB(bool swap_rb) { swap_rb_ = swap_rb; }
+
+private:
+  std::vector<float> alpha_;
+  std::vector<float> beta_;
+  FDTensor gpu_alpha_;
+  FDTensor gpu_beta_;
+  bool swap_rb_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc
new file mode 100755
index 0000000000..b8e4b8f503
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_and_permute_pybind.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindNormalizeAndPermute(pybind11::module &m) {
+  pybind11::class_<vision::NormalizeAndPermute, vision::Processor>(
+      m, "NormalizeAndPermute")
+      .def(pybind11::init<std::vector<float>, std::vector<float>, bool,
+                          std::vector<float>, std::vector<float>, bool>(),
+           "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc
new file mode 100755
index 0000000000..489ba7322e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/normalize_pybind.cc
@@ -0,0 +1,24 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindNormalize(pybind11::module &m) {
+  pybind11::class_<vision::Normalize, vision::Processor>(m, "Normalize")
+      .def(pybind11::init<std::vector<float>, std::vector<float>, bool,
+                          std::vector<float>, std::vector<float>, bool>(),
+           "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc
new file mode 100755
index 0000000000..4ca38c4382
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.cc
@@ -0,0 +1,152 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/pad.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool Pad::ImplByOpenCV(Mat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
+    return false;
+  }
+  if (mat->Channels() > 4) {
+    FDERROR << "Pad: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR << "Pad: Require input channels equals to size of padding value, "
+               "but now channels = "
+            << mat->Channels()
+            << ", the size of padding values = " << value_.size() << "."
+            << std::endl;
+    return false;
+  }
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Scalar value;
+  if (value_.size() == 1) {
+    value = cv::Scalar(value_[0]);
+  } else if (value_.size() == 2) {
+    value = cv::Scalar(value_[0], value_[1]);
+  } else if (value_.size() == 3) {
+    value = cv::Scalar(value_[0], value_[1], value_[2]);
+  } else {
+    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
+  }
+  cv::copyMakeBorder(*im, *im, top_, bottom_, left_, right_,
+                     cv::BORDER_CONSTANT, value);
+  mat->SetHeight(im->rows);
+  mat->SetWidth(im->cols);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool Pad::ImplByFlyCV(Mat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
+    return false;
+  }
+  if (mat->Channels() > 4) {
+    FDERROR << "Pad: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR << "Pad: Require input channels equals to size of padding value, "
+               "but now channels = "
+            << mat->Channels()
+            << ", the size of padding values = " << value_.size() << "."
+            << std::endl;
+    return false;
+  }
+  fcv::Mat *im = mat->GetFlyCVMat();
+  fcv::Scalar value;
+  if (value_.size() == 1) {
+    value = fcv::Scalar(value_[0]);
+  } else if (value_.size() == 2) {
+    value = fcv::Scalar(value_[0], value_[1]);
+  } else if (value_.size() == 3) {
+    value = fcv::Scalar(value_[0], value_[1], value_[2]);
+  } else {
+    value = fcv::Scalar(value_[0], value_[1], value_[2], value_[3]);
+  }
+  fcv::Mat new_im;
+  fcv::copy_make_border(*im, new_im, top_, bottom_, left_, right_,
+                        fcv::BorderType::BORDER_CONSTANT, value);
+  mat->SetMat(new_im);
+  mat->SetHeight(new_im.height());
+  mat->SetWidth(new_im.width());
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool Pad::ImplByCvCuda(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
+    return false;
+  }
+  if (mat->Channels() > 4) {
+    FDERROR << "Pad: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR << "Pad: Require input channels equals to size of padding value, "
+               "but now channels = "
+            << mat->Channels()
+            << ", the size of padding values = " << value_.size() << "."
+            << std::endl;
+    return false;
+  }
+
+  float4 value;
+  if (value_.size() == 1) {
+    value = make_float4(value_[0], 0.0f, 0.0f, 0.0f);
+  } else if (value_.size() == 2) {
+    value = make_float4(value_[0], value_[1], 0.0f, 0.0f);
+  } else if (value_.size() == 3) {
+    value = make_float4(value_[0], value_[1], value_[2], 0.0f);
+  } else {
+    value = make_float4(value_[0], value_[1], value_[2], value_[3]);
+  }
+
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  int height = mat->Height() + top_ + bottom_;
+  int width = mat->Width() + left_ + right_;
+
+  // Prepare output tensor
+  mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
+                            "output_cache", Device::GPU);
+  auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+  cvcuda_pad_op_(mat->Stream(), *src_tensor, *dst_tensor, top_, left_,
+                 NVCV_BORDER_CONSTANT, value);
+
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
+bool Pad::Run(Mat *mat, const int &top, const int &bottom, const int &left,
+              const int &right, const std::vector<float> &value, ProcLib lib) {
+  auto p = Pad(top, bottom, left, right, value);
+  return p(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad.h b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.h
new file mode 100755
index 0000000000..5984a58858
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad.h
@@ -0,0 +1,89 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpCopyMakeBorder.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for padding images.
+ */
+class ULTRAINFER_DECL Pad : public Processor {
+public:
+  Pad(int top, int bottom, int left, int right,
+      const std::vector<float> &value) {
+    top_ = top;
+    bottom_ = bottom;
+    left_ = left;
+    right_ = right;
+    value_ = value;
+  }
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+#endif
+  std::string Name() { return "Pad"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data, `result = mat * alpha + beta`
+   * \param[in] top top pad size of the output image.
+   * \param[in] bottom bottom pad size of the output image.
+   * \param[in] left left pad size of the output image.
+   * \param[in] right right pad size of the output image.
+   * \param[in] value value vector used by padding of the output image.
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, const int &top, const int &bottom, const int &left,
+                  const int &right, const std::vector<float> &value,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+  /** \brief Process the input images
+   *
+   * \param[in] top set the value of the top parameter
+   * \param[in] bottom set the value of the bottom parameter
+   * \param[in] left set the value of the left parameter
+   * \param[in] right set the value of the right parameter
+   */
+  bool SetPaddingSize(int top, int bottom, int left, int right) {
+    top_ = top;
+    bottom_ = bottom;
+    left_ = left;
+    right_ = right;
+    return true;
+  }
+
+private:
+  int top_;
+  int bottom_;
+  int left_;
+  int right_;
+  std::vector<float> value_;
+#ifdef ENABLE_CVCUDA
+  cvcuda::CopyMakeBorder cvcuda_pad_op_;
+#endif
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc
new file mode 100755
index 0000000000..a9886872bb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_pybind.cc
@@ -0,0 +1,23 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPad(pybind11::module &m) {
+  pybind11::class_<vision::Pad, vision::Processor>(m, "Pad").def(
+      pybind11::init<int, int, int, int, std::vector<float>>(),
+      "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc
new file mode 100755
index 0000000000..25c36b55a6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.cc
@@ -0,0 +1,272 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/pad_to_size.h"
+
+#include "ultrainfer/vision/common/processors/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+
+static bool PadHWCByOpenCV(FDMat *mat, int width, int height,
+                           const std::vector<float> &value) {
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Scalar scalar;
+  if (value.size() == 1) {
+    scalar = cv::Scalar(value[0]);
+  } else if (value.size() == 2) {
+    scalar = cv::Scalar(value[0], value[1]);
+  } else if (value.size() == 3) {
+    scalar = cv::Scalar(value[0], value[1], value[2]);
+  } else {
+    scalar = cv::Scalar(value[0], value[1], value[2], value[3]);
+  }
+  // top, bottom, left, right
+  cv::copyMakeBorder(*im, *im, 0, height - origin_h, 0, width - origin_w,
+                     cv::BORDER_CONSTANT, scalar);
+  mat->SetHeight(height);
+  mat->SetWidth(width);
+  return true;
+}
+
+static bool PadCHWByOpenCV(FDMat *mat, int width, int height,
+                           const std::vector<float> &value) {
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Mat new_im(height, width,
+                 CreateOpenCVDataType(mat->Type(), mat->Channels()));
+
+  for (int i = 0; i < mat->Channels(); ++i) {
+    uint8_t *src_data =
+        im->ptr() + i * origin_w * origin_h * FDDataTypeSize(mat->Type());
+    cv::Mat src(origin_h, origin_w, CreateOpenCVDataType(mat->Type(), 1),
+                src_data);
+
+    uint8_t *dst_data =
+        new_im.ptr() + i * width * height * FDDataTypeSize(mat->Type());
+    cv::Mat dst(height, width, CreateOpenCVDataType(mat->Type(), 1), dst_data);
+
+    cv::copyMakeBorder(src, dst, 0, height - origin_h, 0, width - origin_w,
+                       cv::BORDER_CONSTANT, cv::Scalar(value[i]));
+  }
+  mat->SetMat(new_im);
+  mat->SetHeight(height);
+  mat->SetWidth(width);
+  return true;
+}
+
+bool PadToSize::CheckArgs(FDMat *mat) {
+  if (mat->Channels() > 4) {
+    FDERROR << "PadToSize: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR
+        << "PadToSize: Require input channels equals to size of padding value, "
+           "but now channels = "
+        << mat->Channels() << ", the size of padding values = " << value_.size()
+        << "." << std::endl;
+    return false;
+  }
+  if (mat->Width() > width_) {
+    FDERROR << "PadToSize: the input width:" << mat->Width()
+            << " is greater than the target width: " << width_ << "."
+            << std::endl;
+    return false;
+  }
+  if (mat->Height() > height_) {
+    FDERROR << "PadToSize: the input height:" << mat->Height()
+            << " is greater than the target height: " << height_ << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PadToSize::ImplByOpenCV(FDMat *mat) {
+  if (width_ == -1 || height_ == -1 ||
+      (mat->Width() == width_ && mat->Height() == height_)) {
+    return true;
+  }
+  if (CheckArgs(mat) == false) {
+    return false;
+  }
+  if (mat->layout == Layout::HWC) {
+    return PadHWCByOpenCV(mat, width_, height_, value_);
+  } else if (mat->layout == Layout::CHW) {
+    return PadCHWByOpenCV(mat, width_, height_, value_);
+  }
+  return false;
+}
+
+#ifdef ENABLE_FLYCV
+static bool PadHWCByFlyCV(FDMat *mat, int width, int height,
+                          const std::vector<float> &value) {
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  fcv::Mat *im = mat->GetFlyCVMat();
+  fcv::Scalar scalar;
+  if (value.size() == 1) {
+    scalar = fcv::Scalar(value[0]);
+  } else if (value.size() == 2) {
+    scalar = fcv::Scalar(value[0], value[1]);
+  } else if (value.size() == 3) {
+    scalar = fcv::Scalar(value[0], value[1], value[2]);
+  } else {
+    scalar = fcv::Scalar(value[0], value[1], value[2], value[3]);
+  }
+  fcv::Mat new_im;
+  // top, bottom, left, right
+  fcv::copy_make_border(*im, new_im, 0, height - origin_h, 0, width - origin_w,
+                        fcv::BorderType::BORDER_CONSTANT, scalar);
+  mat->SetMat(new_im);
+  mat->SetHeight(height);
+  mat->SetWidth(width);
+  return true;
+}
+
+static bool PadCHWByFlyCV(FDMat *mat, int width, int height,
+                          const std::vector<float> &value) {
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  fcv::Mat new_im(height, width,
+                  CreateFlyCVDataType(mat->Type(), mat->Channels()));
+  for (int i = 0; i < mat->Channels(); ++i) {
+    uint8_t *src_data = reinterpret_cast<uint8_t *>(mat->Data()) +
+                        i * origin_w * origin_h * FDDataTypeSize(mat->Type());
+    fcv::Mat src(origin_h, origin_w, CreateFlyCVDataType(mat->Type(), 1),
+                 src_data);
+
+    uint8_t *dst_data = reinterpret_cast<uint8_t *>(new_im.data()) +
+                        i * width * height * FDDataTypeSize(mat->Type());
+    fcv::Mat dst(height, width, CreateFlyCVDataType(mat->Type(), 1), dst_data);
+
+    fcv::copy_make_border(src, dst, 0, height - origin_h, 0, width - origin_w,
+                          fcv::BorderType::BORDER_CONSTANT,
+                          fcv::Scalar(value[i]));
+  }
+  mat->SetMat(new_im);
+  mat->SetHeight(height);
+  mat->SetWidth(width);
+  return true;
+}
+
+bool PadToSize::ImplByFlyCV(FDMat *mat) {
+  if (width_ == -1 || height_ == -1 ||
+      (mat->Width() == width_ && mat->Height() == height_)) {
+    return true;
+  }
+  if (CheckArgs(mat) == false) {
+    return false;
+  }
+  if (mat->layout == Layout::HWC) {
+    return PadHWCByFlyCV(mat, width_, height_, value_);
+  } else if (mat->layout == Layout::CHW) {
+    return PadCHWByFlyCV(mat, width_, height_, value_);
+  }
+  return false;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+static bool PadHWCByCvCuda(cvcuda::CopyMakeBorder &pad_op, FDMat *mat,
+                           int width, int height,
+                           const std::vector<float> &value) {
+  float4 border_value;
+  if (value.size() == 1) {
+    border_value = make_float4(value[0], 0.0f, 0.0f, 0.0f);
+  } else if (value.size() == 2) {
+    border_value = make_float4(value[0], value[1], 0.0f, 0.0f);
+  } else if (value.size() == 3) {
+    border_value = make_float4(value[0], value[1], value[2], 0.0f);
+  } else {
+    border_value = make_float4(value[0], value[1], value[2], value[3]);
+  }
+
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  // Prepare output tensor
+  mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
+                            "output_cache", Device::GPU);
+  auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+  pad_op(mat->Stream(), *src_tensor, *dst_tensor, 0, 0, NVCV_BORDER_CONSTANT,
+         border_value);
+
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+
+static bool PadCHWByCvCuda(cvcuda::CopyMakeBorder &pad_op, FDMat *mat,
+                           int width, int height,
+                           const std::vector<float> &value) {
+  float4 border_value = make_float4(value[0], 0.0f, 0.0f, 0.0f);
+  FDTensor *input = CreateCachedGpuInputTensor(mat);
+  int channels = input->shape[0];
+  mat->output_cache->Resize({channels, height, width}, mat->Type(),
+                            "output_cache", Device::GPU);
+  for (int i = 0; i < channels; ++i) {
+    uint8_t *src_data =
+        reinterpret_cast<uint8_t *>(input->Data()) +
+        i * mat->Width() * mat->Height() * FDDataTypeSize(mat->Type());
+    FDTensor src;
+    src.SetExternalData({mat->Height(), mat->Width(), 1}, input->Dtype(),
+                        src_data, input->device, input->device_id);
+    auto src_tensor = CreateCvCudaTensorWrapData(src);
+
+    uint8_t *dst_data = reinterpret_cast<uint8_t *>(mat->output_cache->Data()) +
+                        i * width * height * FDDataTypeSize(mat->Type());
+    FDTensor dst;
+    dst.SetExternalData({height, width, 1}, input->Dtype(), dst_data,
+                        input->device, input->device_id);
+    auto dst_tensor = CreateCvCudaTensorWrapData(dst);
+
+    pad_op(mat->Stream(), (*src_tensor), (*dst_tensor), 0, 0,
+           NVCV_BORDER_CONSTANT, border_value);
+  }
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+bool PadToSize::ImplByCvCuda(FDMat *mat) {
+  if (width_ == -1 || height_ == -1 ||
+      (mat->Width() == width_ && mat->Height() == height_)) {
+    return true;
+  }
+  if (CheckArgs(mat) == false) {
+    return false;
+  }
+  if (mat->layout == Layout::HWC) {
+    return PadHWCByCvCuda(cvcuda_pad_op_, mat, width_, height_, value_);
+  } else if (mat->layout == Layout::CHW) {
+    return PadCHWByCvCuda(cvcuda_pad_op_, mat, width_, height_, value_);
+  }
+  return false;
+}
+#endif
+
+bool PadToSize::Run(Mat *mat, int width, int height,
+                    const std::vector<float> &value, ProcLib lib) {
+  auto p = PadToSize(width, height, value);
+  return p(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h
new file mode 100755
index 0000000000..1c7e49627a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpCopyMakeBorder.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for padding images to given size.
+ */
+class ULTRAINFER_DECL PadToSize : public Processor {
+public:
+  // only support pad with right-bottom padding mode
+  PadToSize(int width, int height, const std::vector<float> &value) {
+    width_ = width;
+    height_ = height;
+    value_ = value;
+  }
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+#endif
+  std::string Name() { return "PadToSize"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data, `result = mat * alpha + beta`
+   * \param[in] width width of the output image.
+   * \param[in] height height of the output image.
+   * \param[in] value value vector used by padding of the output image.
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, int width, int height,
+                  const std::vector<float> &value,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+  /** \brief Process the input images
+   *
+   * \param[in] width set the value of the width parameter
+   * \param[in] height set the value of the height parameter
+   */
+  void SetWidthHeight(int width, int height) {
+    width_ = width;
+    height_ = height;
+  }
+
+private:
+  bool CheckArgs(FDMat *mat);
+  int width_;
+  int height_;
+  std::vector<float> value_;
+#ifdef ENABLE_CVCUDA
+  cvcuda::CopyMakeBorder cvcuda_pad_op_;
+#endif
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc
new file mode 100755
index 0000000000..3ea2694434
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/pad_to_size_pybind.cc
@@ -0,0 +1,23 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPadToSize(pybind11::module &m) {
+  pybind11::class_<vision::PadToSize, vision::Processor>(m, "PadToSize")
+      .def(pybind11::init<int, int, std::vector<float>>(),
+           "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc
new file mode 100755
index 0000000000..017b2dd751
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.cc
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/proc_lib.h"
+
+namespace ultrainfer {
+namespace vision {
+
+ProcLib DefaultProcLib::default_lib = ProcLib::DEFAULT;
+
+std::ostream &operator<<(std::ostream &out, const ProcLib &p) {
+  switch (p) {
+  case ProcLib::DEFAULT:
+    out << "ProcLib::DEFAULT";
+    break;
+  case ProcLib::OPENCV:
+    out << "ProcLib::OPENCV";
+    break;
+  case ProcLib::FLYCV:
+    out << "ProcLib::FLYCV";
+    break;
+  case ProcLib::CUDA:
+    out << "ProcLib::CUDA";
+    break;
+  case ProcLib::CVCUDA:
+    out << "ProcLib::CVCUDA";
+    break;
+  default:
+    FDASSERT(false, "Unknow type of ProcLib.");
+  }
+  return out;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h
new file mode 100755
index 0000000000..5ec49687d6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/proc_lib.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+
+enum class ULTRAINFER_DECL ProcLib { DEFAULT, OPENCV, FLYCV, CUDA, CVCUDA };
+
+ULTRAINFER_DECL std::ostream &operator<<(std::ostream &out, const ProcLib &p);
+
+struct ULTRAINFER_DECL DefaultProcLib {
+  // default_lib has the highest priority
+  // all the function in `processor` will force to use
+  // default_lib if this flag is set.
+  // DEFAULT means this flag is not set
+  static ProcLib default_lib;
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc
new file mode 100755
index 0000000000..28f1e7570d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/processors_pybind.cc
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindProcessorManager(pybind11::module &m);
+void BindNormalizeAndPermute(pybind11::module &m);
+void BindProcessor(pybind11::module &m);
+void BindResizeByShort(pybind11::module &m);
+void BindCenterCrop(pybind11::module &m);
+void BindPad(pybind11::module &m);
+void BindCast(pybind11::module &m);
+void BindHWC2CHW(pybind11::module &m);
+void BindNormalize(pybind11::module &m);
+void BindPadToSize(pybind11::module &m);
+void BindResize(pybind11::module &m);
+void BindStridePad(pybind11::module &m);
+
+void BindProcessors(pybind11::module &m) {
+  auto processors_m =
+      m.def_submodule("processors", "Module to deploy Processors models");
+  BindProcessorManager(processors_m);
+  BindProcessor(processors_m);
+  BindNormalizeAndPermute(processors_m);
+  BindResizeByShort(processors_m);
+  BindCenterCrop(processors_m);
+  BindPad(processors_m);
+  BindCast(processors_m);
+  BindHWC2CHW(processors_m);
+  BindNormalize(processors_m);
+  BindPadToSize(processors_m);
+  BindResize(processors_m);
+  BindStridePad(processors_m);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc
new file mode 100755
index 0000000000..c4605ab421
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.cc
@@ -0,0 +1,171 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/resize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool Resize::ImplByOpenCV(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Resize: The format of input is not HWC." << std::endl;
+    return false;
+  }
+  cv::Mat *im = mat->GetOpenCVMat();
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+
+  if (width_ == origin_w && height_ == origin_h) {
+    return true;
+  }
+  if (fabs(scale_w_ - 1.0) < 1e-06 && fabs(scale_h_ - 1.0) < 1e-06) {
+    return true;
+  }
+
+  if (width_ > 0 && height_ > 0) {
+    if (use_scale_) {
+      float scale_w = width_ * 1.0 / origin_w;
+      float scale_h = height_ * 1.0 / origin_h;
+      cv::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_);
+    } else {
+      cv::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_);
+    }
+  } else if (scale_w_ > 0 && scale_h_ > 0) {
+    cv::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_);
+  } else {
+    FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) "
+               "or (scale_w > 0 && scale_h > 0)."
+            << std::endl;
+    return false;
+  }
+  mat->SetWidth(im->cols);
+  mat->SetHeight(im->rows);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool Resize::ImplByFlyCV(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Resize: The format of input is not HWC." << std::endl;
+    return false;
+  }
+  fcv::Mat *im = mat->GetFlyCVMat();
+  int origin_w = im->width();
+  int origin_h = im->height();
+
+  if (width_ == origin_w && height_ == origin_h) {
+    return true;
+  }
+  if (fabs(scale_w_ - 1.0) < 1e-06 && fabs(scale_h_ - 1.0) < 1e-06) {
+    return true;
+  }
+
+  auto interp_method = fcv::InterpolationType::INTER_LINEAR;
+  if (interp_ == 0) {
+    interp_method = fcv::InterpolationType::INTER_NEAREST;
+  } else if (interp_ == 1) {
+    interp_method = fcv::InterpolationType::INTER_LINEAR;
+  } else if (interp_ == 2) {
+    interp_method = fcv::InterpolationType::INTER_CUBIC;
+  } else if (interp_ == 3) {
+    interp_method = fcv::InterpolationType::INTER_AREA;
+  } else {
+    FDERROR << "Resize: Only support interp_ be 0/1/2/3 with FlyCV, but "
+               "now it's "
+            << interp_ << "." << std::endl;
+    return false;
+  }
+
+  if (width_ > 0 && height_ > 0) {
+    fcv::Mat new_im;
+    if (use_scale_) {
+      float scale_w = width_ * 1.0 / origin_w;
+      float scale_h = height_ * 1.0 / origin_h;
+      fcv::resize(*im, new_im, fcv::Size(), scale_w, scale_h, interp_method);
+    } else {
+      fcv::resize(*im, new_im, fcv::Size(width_, height_), 0, 0, interp_method);
+    }
+    mat->SetMat(new_im);
+    mat->SetWidth(new_im.width());
+    mat->SetHeight(new_im.height());
+  } else if (scale_w_ > 0 && scale_h_ > 0) {
+    fcv::Mat new_im;
+    fcv::resize(*im, new_im, fcv::Size(0, 0), scale_w_, scale_h_,
+                interp_method);
+    mat->SetMat(new_im);
+    mat->SetWidth(new_im.width());
+    mat->SetHeight(new_im.height());
+  } else {
+    FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) "
+               "or (scale_w > 0 && scale_h > 0)."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool Resize::ImplByCvCuda(FDMat *mat) {
+  if (width_ == mat->Width() && height_ == mat->Height()) {
+    return true;
+  }
+  if (fabs(scale_w_ - 1.0) < 1e-06 && fabs(scale_h_ - 1.0) < 1e-06) {
+    return true;
+  }
+
+  if (width_ > 0 && height_ > 0) {
+  } else if (scale_w_ > 0 && scale_h_ > 0) {
+    width_ = std::round(scale_w_ * mat->Width());
+    height_ = std::round(scale_h_ * mat->Height());
+  } else {
+    FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) "
+               "or (scale_w > 0 && scale_h > 0)."
+            << std::endl;
+    return false;
+  }
+
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  // Prepare output tensor
+  mat->output_cache->Resize({height_, width_, mat->Channels()}, mat->Type(),
+                            "output_cache", Device::GPU);
+  auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+  // CV-CUDA Interp value is compatible with OpenCV
+  cvcuda_resize_op_(mat->Stream(), *src_tensor, *dst_tensor,
+                    CreateCvCudaInterp(interp_));
+
+  mat->SetTensor(mat->output_cache);
+  mat->SetWidth(width_);
+  mat->SetHeight(height_);
+  mat->device = Device::GPU;
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
+bool Resize::Run(FDMat *mat, int width, int height, float scale_w,
+                 float scale_h, int interp, bool use_scale, ProcLib lib) {
+  if (mat->Height() == height && mat->Width() == width) {
+    return true;
+  }
+  auto r = Resize(width, height, scale_w, scale_h, interp, use_scale);
+  return r(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize.h b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.h
new file mode 100755
index 0000000000..ee3e0dc31d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize.h
@@ -0,0 +1,93 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpResize.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for Resize images.
+ */
+class ULTRAINFER_DECL Resize : public Processor {
+public:
+  Resize(int width, int height, float scale_w = -1.0, float scale_h = -1.0,
+         int interp = 1, bool use_scale = false) {
+    width_ = width;
+    height_ = height;
+    scale_w_ = scale_w;
+    scale_h_ = scale_h;
+    interp_ = interp;
+    use_scale_ = use_scale;
+  }
+
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+#endif
+  std::string Name() { return "Resize"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data, `result = mat * alpha + beta`
+   * \param[in] width width of the output image.
+   * \param[in] height height of the output image.
+   * \param[in] scale_w scale of width, deafult is -1.0.
+   * \param[in] scale_h scale of height, deafult is -1.0.
+   * \param[in] interp interpolation method, deafult is 1.
+   * \param[in] use_scale to define wheather to scale the image, deafult is
+   * true. \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(FDMat *mat, int width, int height, float scale_w = -1.0,
+                  float scale_h = -1.0, int interp = 1, bool use_scale = false,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+  /** \brief Process the input images
+   *
+   * \param[in] width set the value of the width parameter
+   * \param[in] height set the value of the height parameter
+   */
+  bool SetWidthAndHeight(int width, int height) {
+    width_ = width;
+    height_ = height;
+    return true;
+  }
+
+  std::tuple<int, int> GetWidthAndHeight() {
+    return std::make_tuple(width_, height_);
+  }
+
+private:
+  int width_;
+  int height_;
+  float scale_w_ = -1.0;
+  float scale_h_ = -1.0;
+  int interp_ = 1;
+  bool use_scale_ = false;
+#ifdef ENABLE_CVCUDA
+  cvcuda::Resize cvcuda_resize_op_;
+#endif
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc
new file mode 100755
index 0000000000..91b568ae96
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.cc
@@ -0,0 +1,188 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/resize_by_short.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool ResizeByShort::ImplByOpenCV(FDMat *mat) {
+  cv::Mat *im = mat->GetOpenCVMat();
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  double scale = GenerateScale(origin_w, origin_h);
+  if (use_scale_ && fabs(scale - 1.0) >= 1e-06) {
+    cv::resize(*im, *im, cv::Size(), scale, scale, interp_);
+  } else {
+    int width = static_cast<int>(round(scale * im->cols));
+    int height = static_cast<int>(round(scale * im->rows));
+    if (width != origin_w || height != origin_h) {
+      cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_);
+    }
+  }
+  mat->SetWidth(im->cols);
+  mat->SetHeight(im->rows);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool ResizeByShort::ImplByFlyCV(FDMat *mat) {
+  fcv::Mat *im = mat->GetFlyCVMat();
+  int origin_w = im->width();
+  int origin_h = im->height();
+  double scale = GenerateScale(origin_w, origin_h);
+
+  auto interp_method = fcv::InterpolationType::INTER_LINEAR;
+  if (interp_ == 0) {
+    interp_method = fcv::InterpolationType::INTER_NEAREST;
+  } else if (interp_ == 1) {
+    interp_method = fcv::InterpolationType::INTER_LINEAR;
+  } else if (interp_ == 2) {
+    interp_method = fcv::InterpolationType::INTER_CUBIC;
+  } else if (interp_ == 3) {
+    interp_method = fcv::InterpolationType::INTER_AREA;
+  } else {
+    FDERROR << "LimitByShort: Only support interp_ be 0/1/2/3 with FlyCV, but "
+               "now it's "
+            << interp_ << "." << std::endl;
+    return false;
+  }
+
+  if (use_scale_ && fabs(scale - 1.0) >= 1e-06) {
+    fcv::Mat new_im;
+    fcv::resize(*im, new_im, fcv::Size(), scale, scale, interp_method);
+    mat->SetMat(new_im);
+    mat->SetHeight(new_im.height());
+    mat->SetWidth(new_im.width());
+  } else {
+    int width = static_cast<int>(round(scale * im->width()));
+    int height = static_cast<int>(round(scale * im->height()));
+    if (width != origin_w || height != origin_h) {
+      fcv::Mat new_im;
+      fcv::resize(*im, new_im, fcv::Size(width, height), 0, 0, interp_method);
+      mat->SetMat(new_im);
+      mat->SetHeight(new_im.height());
+      mat->SetWidth(new_im.width());
+    }
+  }
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool ResizeByShort::ImplByCvCuda(FDMat *mat) {
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  double scale = GenerateScale(mat->Width(), mat->Height());
+  int width = static_cast<int>(round(scale * mat->Width()));
+  int height = static_cast<int>(round(scale * mat->Height()));
+
+  // Prepare output tensor
+  mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
+                            "output_cache", Device::GPU);
+  auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+  cvcuda_resize_op_(mat->Stream(), *src_tensor, *dst_tensor,
+                    CreateCvCudaInterp(interp_));
+
+  mat->SetTensor(mat->output_cache);
+  mat->SetWidth(width);
+  mat->SetHeight(height);
+  mat->device = Device::GPU;
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+
+bool ResizeByShort::ImplByCvCuda(FDMatBatch *mat_batch) {
+  // TODO(wangxinyu): to support batched tensor as input
+  FDASSERT(mat_batch->has_batched_tensor == false,
+           "ResizeByShort doesn't support batched tensor as input for now.");
+  // Prepare input batch
+  std::string tensor_name = Name() + "_cvcuda_src";
+  std::vector<FDTensor *> src_tensors;
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    FDTensor *src = CreateCachedGpuInputTensor(&(*(mat_batch->mats))[i]);
+    src_tensors.push_back(src);
+  }
+  nvcv::ImageBatchVarShape src_batch(mat_batch->mats->size());
+  CreateCvCudaImageBatchVarShape(src_tensors, src_batch);
+
+  // Prepare output batch
+  tensor_name = Name() + "_cvcuda_dst";
+  std::vector<FDTensor *> dst_tensors;
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    FDMat *mat = &(*(mat_batch->mats))[i];
+    double scale = GenerateScale(mat->Width(), mat->Height());
+    int width = static_cast<int>(round(scale * mat->Width()));
+    int height = static_cast<int>(round(scale * mat->Height()));
+    mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
+                              "output_cache", Device::GPU);
+    dst_tensors.push_back(mat->output_cache);
+  }
+  nvcv::ImageBatchVarShape dst_batch(mat_batch->mats->size());
+  CreateCvCudaImageBatchVarShape(dst_tensors, dst_batch);
+
+  // CV-CUDA Interp value is compatible with OpenCV
+  cvcuda_resize_op_(mat_batch->Stream(), src_batch, dst_batch,
+                    CreateCvCudaInterp(interp_));
+
+  for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
+    FDMat *mat = &(*(mat_batch->mats))[i];
+    mat->SetTensor(dst_tensors[i]);
+    mat->SetWidth(dst_tensors[i]->Shape()[1]);
+    mat->SetHeight(dst_tensors[i]->Shape()[0]);
+    mat->device = Device::GPU;
+    mat->mat_type = ProcLib::CVCUDA;
+  }
+  mat_batch->device = Device::GPU;
+  mat_batch->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
+double ResizeByShort::GenerateScale(const int origin_w, const int origin_h) {
+  int im_size_max = std::max(origin_w, origin_h);
+  int im_size_min = std::min(origin_w, origin_h);
+  double scale =
+      static_cast<double>(target_size_) / static_cast<double>(im_size_min);
+
+  if (max_hw_.size() > 0) {
+    FDASSERT(max_hw_.size() == 2,
+             "Require size of max_hw_ be 2, but now it's %zu.", max_hw_.size());
+    FDASSERT(
+        max_hw_[0] > 0 && max_hw_[1] > 0,
+        "Require elements in max_hw_ greater than 0, but now it's [%d, %d].",
+        max_hw_[0], max_hw_[1]);
+
+    double scale_h =
+        static_cast<double>(max_hw_[0]) / static_cast<double>(origin_h);
+    double scale_w =
+        static_cast<double>(max_hw_[1]) / static_cast<double>(origin_w);
+    double min_scale = std::min(scale_h, scale_w);
+    if (min_scale < scale) {
+      scale = min_scale;
+    }
+  }
+  return scale;
+}
+
+bool ResizeByShort::Run(FDMat *mat, int target_size, int interp, bool use_scale,
+                        const std::vector<int> &max_hw, ProcLib lib) {
+  auto r = ResizeByShort(target_size, interp, use_scale, max_hw);
+  return r(mat, lib);
+}
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h
new file mode 100755
index 0000000000..b4d0f14251
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpResize.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for resize images by short edge.
+ */
+class ULTRAINFER_DECL ResizeByShort : public Processor {
+public:
+  ResizeByShort(int target_size, int interp = 1, bool use_scale = true,
+                const std::vector<int> &max_hw = std::vector<int>()) {
+    target_size_ = target_size;
+    max_hw_ = max_hw;
+    interp_ = interp;
+    use_scale_ = use_scale;
+  }
+  bool ImplByOpenCV(FDMat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+  bool ImplByCvCuda(FDMatBatch *mat_batch);
+#endif
+  std::string Name() { return "ResizeByShort"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data, `result = mat * alpha + beta`
+   * \param[in] target_size target size of the output image.
+   * \param[in] interp interpolation method, deafult is 1.
+   * \param[in] use_scale to define wheather to scale the image, deafult is
+   * true. \param[in] max_hw max HW fo output image. \param[in] lib to define
+   * OpenCV or FlyCV or CVCUDA will be used. \return true if the process
+   * successed, otherwise false
+   */
+  static bool Run(FDMat *mat, int target_size, int interp = 1,
+                  bool use_scale = true,
+                  const std::vector<int> &max_hw = std::vector<int>(),
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  double GenerateScale(const int origin_w, const int origin_h);
+  int target_size_;
+  std::vector<int> max_hw_;
+  int interp_;
+  bool use_scale_;
+#ifdef ENABLE_CVCUDA
+  cvcuda::Resize cvcuda_resize_op_;
+#endif
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc
new file mode 100755
index 0000000000..79ad41037d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_by_short_pybind.cc
@@ -0,0 +1,23 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindResizeByShort(pybind11::module &m) {
+  pybind11::class_<vision::ResizeByShort, vision::Processor>(m, "ResizeByShort")
+      .def(pybind11::init<int, int, bool, std::vector<int>>(),
+           "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc
new file mode 100755
index 0000000000..0f8a1f4ceb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/resize_pybind.cc
@@ -0,0 +1,23 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindResize(pybind11::module &m) {
+  pybind11::class_<vision::Resize, vision::Processor>(m, "Resize")
+      .def(pybind11::init<int, int, float, float, int, bool>(),
+           "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc
new file mode 100755
index 0000000000..9d0f292e1c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.cc
@@ -0,0 +1,186 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/stride_pad.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool StridePad::ImplByOpenCV(Mat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "StridePad: The input data must be Layout::HWC format!"
+            << std::endl;
+    return false;
+  }
+  if (mat->Channels() > 4) {
+    FDERROR << "StridePad: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR
+        << "StridePad: Require input channels equals to size of padding value, "
+           "but now channels = "
+        << mat->Channels() << ", the size of padding values = " << value_.size()
+        << "." << std::endl;
+    return false;
+  }
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+
+  int pad_h = (mat->Height() / stride_) * stride_ +
+              (mat->Height() % stride_ != 0) * stride_ - mat->Height();
+  int pad_w = (mat->Width() / stride_) * stride_ +
+              (mat->Width() % stride_ != 0) * stride_ - mat->Width();
+  if (pad_h == 0 && pad_w == 0) {
+    return true;
+  }
+  cv::Mat *im = mat->GetOpenCVMat();
+  cv::Scalar value;
+  if (value_.size() == 1) {
+    value = cv::Scalar(value_[0]);
+  } else if (value_.size() == 2) {
+    value = cv::Scalar(value_[0], value_[1]);
+  } else if (value_.size() == 3) {
+    value = cv::Scalar(value_[0], value_[1], value_[2]);
+  } else {
+    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
+  }
+  // top, bottom, left, right
+  cv::copyMakeBorder(*im, *im, 0, pad_h, 0, pad_w, cv::BORDER_CONSTANT, value);
+  mat->SetHeight(origin_h + pad_h);
+  mat->SetWidth(origin_w + pad_w);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool StridePad::ImplByFlyCV(Mat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "StridePad: The input data must be Layout::HWC format!"
+            << std::endl;
+    return false;
+  }
+  if (mat->Channels() > 4) {
+    FDERROR << "StridePad: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR
+        << "StridePad: Require input channels equals to size of padding value, "
+           "but now channels = "
+        << mat->Channels() << ", the size of padding values = " << value_.size()
+        << "." << std::endl;
+    return false;
+  }
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+
+  int pad_h = (mat->Height() / stride_) * stride_ +
+              (mat->Height() % stride_ != 0) * stride_ - mat->Height();
+  int pad_w = (mat->Width() / stride_) * stride_ +
+              (mat->Width() % stride_ != 0) * stride_ - mat->Width();
+  if (pad_h == 0 && pad_w == 0) {
+    return true;
+  }
+  fcv::Mat *im = mat->GetFlyCVMat();
+  fcv::Scalar value;
+  if (value_.size() == 1) {
+    value = fcv::Scalar(value_[0]);
+  } else if (value_.size() == 2) {
+    value = fcv::Scalar(value_[0], value_[1]);
+  } else if (value_.size() == 3) {
+    value = fcv::Scalar(value_[0], value_[1], value_[2]);
+  } else {
+    value = fcv::Scalar(value_[0], value_[1], value_[2], value_[3]);
+  }
+  fcv::Mat new_im;
+  // top, bottom, left, right
+  fcv::copy_make_border(*im, new_im, 0, pad_h, 0, pad_w,
+                        fcv::BorderType::BORDER_CONSTANT, value);
+  mat->SetMat(new_im);
+  mat->SetHeight(new_im.height());
+  mat->SetWidth(new_im.width());
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool StridePad::ImplByCvCuda(FDMat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "StridePad: The input data must be Layout::HWC format!"
+            << std::endl;
+    return false;
+  }
+  if (mat->Channels() > 4) {
+    FDERROR << "StridePad: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR
+        << "StridePad: Require input channels equals to size of padding value, "
+           "but now channels = "
+        << mat->Channels() << ", the size of padding values = " << value_.size()
+        << "." << std::endl;
+    return false;
+  }
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+
+  int pad_h = (mat->Height() / stride_) * stride_ +
+              (mat->Height() % stride_ != 0) * stride_ - mat->Height();
+  int pad_w = (mat->Width() / stride_) * stride_ +
+              (mat->Width() % stride_ != 0) * stride_ - mat->Width();
+  if (pad_h == 0 && pad_w == 0) {
+    return true;
+  }
+
+  float4 value;
+  if (value_.size() == 1) {
+    value = make_float4(value_[0], 0.0f, 0.0f, 0.0f);
+  } else if (value_.size() == 2) {
+    value = make_float4(value_[0], value_[1], 0.0f, 0.0f);
+  } else if (value_.size() == 3) {
+    value = make_float4(value_[0], value_[1], value_[2], 0.0f);
+  } else {
+    value = make_float4(value_[0], value_[1], value_[2], value_[3]);
+  }
+
+  // Prepare input tensor
+  FDTensor *src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  int height = mat->Height() + pad_h;
+  int width = mat->Width() + pad_w;
+
+  // Prepare output tensor
+  mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
+                            "output_cache", Device::GPU);
+  auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+  cvcuda_pad_op_(mat->Stream(), *src_tensor, *dst_tensor, 0, 0,
+                 NVCV_BORDER_CONSTANT, value);
+
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
+bool StridePad::Run(Mat *mat, int stride, const std::vector<float> &value,
+                    ProcLib lib) {
+  auto p = StridePad(stride, value);
+  return p(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h
new file mode 100755
index 0000000000..5e873c4b4f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad.h
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include <cvcuda/OpCopyMakeBorder.hpp>
+
+#include "ultrainfer/vision/common/processors/cvcuda_utils.h"
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+/*! @brief Processor for padding images with stride.
+ */
+class ULTRAINFER_DECL StridePad : public Processor {
+public:
+  // only support pad with left-top padding mode
+  StridePad(int stride, const std::vector<float> &value) {
+    stride_ = stride;
+    value_ = value;
+  }
+  bool ImplByOpenCV(Mat *mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(Mat *mat);
+#endif
+#ifdef ENABLE_CVCUDA
+  bool ImplByCvCuda(FDMat *mat);
+#endif
+  std::string Name() { return "StridePad"; }
+
+  /** \brief Process the input images
+   *
+   * \param[in] mat The input image data, `result = mat * alpha + beta`
+   * \param[in] stride stride of the padding.
+   * \param[in] value value vector used by padding of the output image.
+   * \param[in] lib to define OpenCV or FlyCV or CVCUDA will be used.
+   * \return true if the process successed, otherwise false
+   */
+  static bool Run(Mat *mat, int stride,
+                  const std::vector<float> &value = std::vector<float>(),
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  int stride_ = 32;
+  std::vector<float> value_;
+#ifdef ENABLE_CVCUDA
+  cvcuda::CopyMakeBorder cvcuda_pad_op_;
+#endif
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc
new file mode 100755
index 0000000000..ddc579bdba
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/stride_pad_pybind.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindStridePad(pybind11::module &m) {
+  pybind11::class_<vision::StridePad, vision::Processor>(m, "StridePad")
+      .def(pybind11::init<int, std::vector<float>>(), "Default constructor");
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc
new file mode 100755
index 0000000000..7bc2818945
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.cc
@@ -0,0 +1,169 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/transform.h"
+
+namespace ultrainfer {
+namespace vision {
+
+void FuseNormalizeCast(std::vector<std::shared_ptr<Processor>> *processors) {
+  // Fuse Normalize and Cast<Float>
+  int cast_index = -1;
+  for (size_t i = 0; i < processors->size(); ++i) {
+    if ((*processors)[i]->Name() == "Cast") {
+      if (i == 0) {
+        continue;
+      }
+      if ((*processors)[i - 1]->Name() != "Normalize" &&
+          (*processors)[i - 1]->Name() != "NormalizeAndPermute") {
+        continue;
+      }
+      cast_index = i;
+    }
+  }
+  if (cast_index < 0) {
+    return;
+  }
+
+  if (dynamic_cast<Cast *>((*processors)[cast_index].get())->GetDtype() !=
+      "float") {
+    return;
+  }
+  processors->erase(processors->begin() + cast_index);
+  FDINFO << (*processors)[cast_index - 1]->Name() << " and Cast are fused to "
+         << (*processors)[cast_index - 1]->Name()
+         << " in preprocessing pipeline." << std::endl;
+}
+
+void FuseNormalizeHWC2CHW(std::vector<std::shared_ptr<Processor>> *processors) {
+  // Fuse Normalize and HWC2CHW to NormalizeAndPermute
+  int hwc2chw_index = -1;
+  for (size_t i = 0; i < processors->size(); ++i) {
+    if ((*processors)[i]->Name() == "HWC2CHW") {
+      if (i == 0) {
+        continue;
+      }
+      if ((*processors)[i - 1]->Name() != "Normalize") {
+        continue;
+      }
+      hwc2chw_index = i;
+    }
+  }
+
+  if (hwc2chw_index < 0) {
+    return;
+  }
+
+  // Get alpha and beta of Normalize
+  std::vector<float> alpha =
+      dynamic_cast<Normalize *>((*processors)[hwc2chw_index - 1].get())
+          ->GetAlpha();
+  std::vector<float> beta =
+      dynamic_cast<Normalize *>((*processors)[hwc2chw_index - 1].get())
+          ->GetBeta();
+
+  // Delete Normalize and HWC2CHW
+  processors->erase(processors->begin() + hwc2chw_index);
+  processors->erase(processors->begin() + hwc2chw_index - 1);
+
+  // Add NormalizeAndPermute
+  std::vector<float> mean({0.0, 0.0, 0.0});
+  std::vector<float> std({1.0, 1.0, 1.0});
+  processors->push_back(std::make_shared<NormalizeAndPermute>(mean, std));
+
+  // Set alpha and beta
+  auto processor = dynamic_cast<NormalizeAndPermute *>(
+      (*processors)[hwc2chw_index - 1].get());
+
+  processor->SetAlpha(alpha);
+  processor->SetBeta(beta);
+  FDINFO << "Normalize and HWC2CHW are fused to NormalizeAndPermute "
+            " in preprocessing pipeline."
+         << std::endl;
+}
+
+void FuseNormalizeColorConvert(
+    std::vector<std::shared_ptr<Processor>> *processors) {
+  // Fuse Normalize and BGR2RGB/RGB2BGR
+  int normalize_index = -1;
+  int color_convert_index = -1;
+  // If these middle processors are after BGR2RGB/RGB2BGR and before Normalize,
+  // we can still fuse Normalize and BGR2RGB/RGB2BGR
+  static std::unordered_set<std::string> middle_processors(
+      {"Resize", "ResizeByShort", "ResizeByLong", "Crop", "CenterCrop",
+       "LimitByStride", "LimitShort", "Pad", "PadToSize", "StridePad",
+       "WarpAffine"});
+
+  for (size_t i = 0; i < processors->size(); ++i) {
+    if ((*processors)[i]->Name() == "BGR2RGB" ||
+        (*processors)[i]->Name() == "RGB2BGR") {
+      color_convert_index = i;
+      for (size_t j = color_convert_index + 1; j < processors->size(); ++j) {
+        if ((*processors)[j]->Name() == "Normalize" ||
+            (*processors)[j]->Name() == "NormalizeAndPermute") {
+          normalize_index = j;
+          break;
+        }
+      }
+      if (normalize_index < 0) {
+        return;
+      }
+      for (size_t j = color_convert_index + 1; j < normalize_index; ++j) {
+        if (middle_processors.count((*processors)[j]->Name())) {
+          continue;
+        }
+        return;
+      }
+    }
+  }
+
+  if (color_convert_index < 0) {
+    return;
+  }
+
+  // Delete Color Space Convert
+  std::string color_processor_name = (*processors)[color_convert_index]->Name();
+  processors->erase(processors->begin() + color_convert_index);
+
+  // Toggle the swap_rb option of the Normalize processor
+  std::string normalize_processor_name =
+      (*processors)[normalize_index - 1]->Name();
+  bool swap_rb;
+  if (normalize_processor_name == "Normalize") {
+    auto processor =
+        dynamic_cast<Normalize *>((*processors)[normalize_index - 1].get());
+    swap_rb = processor->GetSwapRB();
+    processor->SetSwapRB(!swap_rb);
+  } else if (normalize_processor_name == "NormalizeAndPermute") {
+    auto processor = dynamic_cast<NormalizeAndPermute *>(
+        (*processors)[normalize_index - 1].get());
+    swap_rb = processor->GetSwapRB();
+    processor->SetSwapRB(!swap_rb);
+  } else {
+    FDASSERT(false, "Something wrong in FuseNormalizeColorConvert().");
+  }
+
+  FDINFO << color_processor_name << " and " << normalize_processor_name
+         << " are fused to " << normalize_processor_name
+         << " with swap_rb=" << !swap_rb << std::endl;
+}
+
+void FuseTransforms(std::vector<std::shared_ptr<Processor>> *processors) {
+  FuseNormalizeCast(processors);
+  FuseNormalizeHWC2CHW(processors);
+  FuseNormalizeColorConvert(processors);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/transform.h b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.h
new file mode 100755
index 0000000000..dc3da75361
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/transform.h
@@ -0,0 +1,49 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/cast.h"
+#include "ultrainfer/vision/common/processors/center_crop.h"
+#include "ultrainfer/vision/common/processors/color_space_convert.h"
+#include "ultrainfer/vision/common/processors/convert.h"
+#include "ultrainfer/vision/common/processors/convert_and_permute.h"
+#include "ultrainfer/vision/common/processors/crop.h"
+#include "ultrainfer/vision/common/processors/hwc2chw.h"
+#include "ultrainfer/vision/common/processors/limit_by_stride.h"
+#include "ultrainfer/vision/common/processors/limit_short.h"
+#include "ultrainfer/vision/common/processors/normalize.h"
+#include "ultrainfer/vision/common/processors/normalize_and_permute.h"
+#include "ultrainfer/vision/common/processors/pad.h"
+#include "ultrainfer/vision/common/processors/pad_to_size.h"
+#include "ultrainfer/vision/common/processors/resize.h"
+#include "ultrainfer/vision/common/processors/resize_by_short.h"
+#include "ultrainfer/vision/common/processors/stride_pad.h"
+#include "ultrainfer/vision/common/processors/warp_affine.h"
+#include <unordered_set>
+
+namespace ultrainfer {
+namespace vision {
+
+void FuseTransforms(std::vector<std::shared_ptr<Processor>> *processors);
+// Fuse Normalize + Cast(Float) to Normalize
+void FuseNormalizeCast(std::vector<std::shared_ptr<Processor>> *processors);
+// Fuse Normalize + HWC2CHW to NormalizeAndPermute
+void FuseNormalizeHWC2CHW(std::vector<std::shared_ptr<Processor>> *processors);
+// Fuse Normalize + Color Convert
+void FuseNormalizeColorConvert(
+    std::vector<std::shared_ptr<Processor>> *processors);
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc
new file mode 100755
index 0000000000..0ca94fd063
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.cc
@@ -0,0 +1,279 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/utils/utils.h"
+
+#include "ultrainfer/vision/common/processors/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+
+FDDataType OpenCVDataTypeToFD(int type) {
+  type = type % 8;
+  if (type == 0) {
+    return FDDataType::UINT8;
+  } else if (type == 1) {
+    return FDDataType::INT8;
+  } else if (type == 2) {
+    FDASSERT(false,
+             "While calling OpenCVDataTypeToFD(), get UINT16 type which is not "
+             "supported now.");
+  } else if (type == 3) {
+    return FDDataType::INT16;
+  } else if (type == 4) {
+    return FDDataType::INT32;
+  } else if (type == 5) {
+    return FDDataType::FP32;
+  } else if (type == 6) {
+    return FDDataType::FP64;
+  } else {
+    FDASSERT(false,
+             "While calling OpenCVDataTypeToFD(), get type = %d, which is not "
+             "expected.",
+             type);
+  }
+}
+
+int CreateOpenCVDataType(FDDataType type, int channel) {
+  FDASSERT(channel == 1 || channel == 3 || channel == 4,
+           "Only support channel be 1/3/4 in OpenCV.");
+  if (type == FDDataType::UINT8) {
+    if (channel == 1) {
+      return CV_8UC1;
+    } else if (channel == 3) {
+      return CV_8UC3;
+    } else {
+      return CV_8UC4;
+    }
+  } else if (type == FDDataType::FP32) {
+    if (channel == 1) {
+      return CV_32FC1;
+    } else if (channel == 3) {
+      return CV_32FC3;
+    } else {
+      return CV_32FC4;
+    }
+  }
+  FDASSERT(false, "Data type of %s is not supported.", Str(type).c_str());
+  return CV_32FC3;
+}
+
+#ifdef ENABLE_FLYCV
+FDDataType FlyCVDataTypeToFD(fcv::FCVImageType type) {
+  if (type == fcv::FCVImageType::GRAY_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_BGR_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_RGB_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_BGR_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_RGB_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PLA_BGR_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PLA_RGB_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PLA_BGRA_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PLA_RGBA_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PLA_BGR_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PLA_RGB_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PLA_BGRA_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PLA_RGBA_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_BGRA_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_RGBA_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_BGRA_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_RGBA_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_BGR565_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::PKG_RGB565_U8) {
+    return FDDataType::UINT8;
+  } else if (type == fcv::FCVImageType::GRAY_S32) {
+    return FDDataType::INT32;
+  } else if (type == fcv::FCVImageType::GRAY_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_BGR_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_RGB_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_BGR_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_RGB_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_BGRA_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_RGBA_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_BGRA_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::PKG_RGBA_F32) {
+    return FDDataType::FP32;
+  } else if (type == fcv::FCVImageType::GRAY_F64) {
+    return FDDataType::FP64;
+  }
+  FDASSERT(false, "While calling FlyCVDataTypeToFD(), get unexpected type:%d.",
+           int(type));
+  return FDDataType::UNKNOWN1;
+}
+
+fcv::FCVImageType CreateFlyCVDataType(FDDataType type, int channel) {
+  FDASSERT(channel == 1 || channel == 3 || channel == 4,
+           "Only support channel be 1/3/4 in FlyCV.");
+  if (type == FDDataType::UINT8) {
+    if (channel == 1) {
+      return fcv::FCVImageType::GRAY_U8;
+    } else if (channel == 3) {
+      return fcv::FCVImageType::PKG_BGR_U8;
+    } else {
+      return fcv::FCVImageType::PKG_BGRA_U8;
+    }
+  } else if (type == FDDataType::FP32) {
+    if (channel == 1) {
+      return fcv::FCVImageType::GRAY_F32;
+    } else if (channel == 3) {
+      return fcv::FCVImageType::PKG_BGR_F32;
+    } else {
+      return fcv::FCVImageType::PKG_BGRA_F32;
+    }
+  }
+  FDASSERT(false, "Data type of %s is not supported.", Str(type).c_str());
+  return fcv::FCVImageType::PKG_BGR_F32;
+}
+
+fcv::Mat ConvertOpenCVMatToFlyCV(cv::Mat &im) {
+  int type = im.type() % 8;
+  // 0: uint8; 5: float32; 6: float64
+  if (type != 0 && type != 5 && type != 6) {
+    FDASSERT(false, "Only support type of uint8/float/double, but now it's %d.",
+             im.type());
+  }
+  auto fcv_type =
+      CreateFlyCVDataType(OpenCVDataTypeToFD(im.type()), im.channels());
+  return fcv::Mat(im.cols, im.rows, fcv_type, im.ptr()); // reference only
+}
+
+cv::Mat ConvertFlyCVMatToOpenCV(fcv::Mat &fim) {
+  auto fd_dtype = FlyCVDataTypeToFD(fim.type());
+  if (fd_dtype != FDDataType::UINT8 && fd_dtype != FDDataType::FP32 &&
+      fd_dtype != FDDataType::FP64) {
+    FDASSERT(false, "Only support type of uint8/float/double, but now it's %s.",
+             Str(fd_dtype).c_str());
+  }
+  auto ocv_type = CreateOpenCVDataType(fd_dtype, fim.channels());
+  return cv::Mat(fim.height(), fim.width(), ocv_type,
+                 fim.data()); // reference only
+}
+#endif
+
+cv::Mat CreateZeroCopyOpenCVMatFromBuffer(int height, int width, int channels,
+                                          FDDataType type, void *data) {
+  cv::Mat ocv_mat;
+  switch (type) {
+  case FDDataType::UINT8:
+    ocv_mat = cv::Mat(height, width, CV_8UC(channels), data);
+    break;
+  case FDDataType::INT8:
+    ocv_mat = cv::Mat(height, width, CV_8SC(channels), data);
+    break;
+  case FDDataType::INT16:
+    ocv_mat = cv::Mat(height, width, CV_16SC(channels), data);
+    break;
+  case FDDataType::INT32:
+    ocv_mat = cv::Mat(height, width, CV_32SC(channels), data);
+    break;
+  case FDDataType::FP32:
+    ocv_mat = cv::Mat(height, width, CV_32FC(channels), data);
+    break;
+  case FDDataType::FP64:
+    ocv_mat = cv::Mat(height, width, CV_64FC(channels), data);
+    break;
+  default:
+    FDASSERT(false,
+             "Tensor type %d is not supported While calling "
+             "CreateZeroCopyOpenCVMat.",
+             type);
+    break;
+  }
+  return ocv_mat;
+}
+
+cv::Mat CreateZeroCopyOpenCVMatFromTensor(const FDTensor &tensor,
+                                          Layout layout) {
+  FDASSERT(tensor.shape.size() == 3, "When create OepnCV Mat from tensor,"
+                                     "tensor shape should be 3-Dim");
+  FDDataType type = tensor.dtype;
+  int height = static_cast<int>(tensor.shape[0]);
+  int width = static_cast<int>(tensor.shape[1]);
+  int channels = static_cast<int>(tensor.shape[2]);
+  if (layout == Layout::CHW) {
+    channels = static_cast<int>(tensor.shape[0]);
+    height = static_cast<int>(tensor.shape[1]);
+    width = static_cast<int>(tensor.shape[2]);
+  }
+  return CreateZeroCopyOpenCVMatFromBuffer(
+      height, width, channels, type, const_cast<void *>(tensor.CpuData()));
+}
+
+#ifdef ENABLE_FLYCV
+fcv::Mat CreateZeroCopyFlyCVMatFromBuffer(int height, int width, int channels,
+                                          FDDataType type, void *data) {
+  fcv::Mat fcv_mat;
+  auto fcv_type = CreateFlyCVDataType(type, channels);
+  switch (type) {
+  case FDDataType::UINT8:
+    fcv_mat = fcv::Mat(width, height, fcv_type, data);
+    break;
+  case FDDataType::FP32:
+    fcv_mat = fcv::Mat(width, height, fcv_type, data);
+    break;
+  case FDDataType::FP64:
+    fcv_mat = fcv::Mat(width, height, fcv_type, data);
+    break;
+  default:
+    FDASSERT(false,
+             "Tensor type %d is not supported While calling "
+             "CreateZeroCopyFlyCVMat.",
+             type);
+    break;
+  }
+  return fcv_mat;
+}
+
+fcv::Mat CreateZeroCopyFlyCVMatFromTensor(const FDTensor &tensor) {
+  // TODO(qiuyanjun): Should add a Layout checking. Now, we
+  // assume that the input tensor is already in Layout::HWC.
+  FDASSERT(tensor.shape.size() == 3,
+           "When create FlyCV Mat from tensor,"
+           "tensor shape should be 3-Dim, HWC layout");
+  FDDataType type = tensor.dtype;
+  int height = static_cast<int>(tensor.shape[0]);
+  int width = static_cast<int>(tensor.shape[1]);
+  int channels = static_cast<int>(tensor.shape[2]);
+  return CreateZeroCopyFlyCVMatFromBuffer(height, width, channels, type,
+                                          const_cast<void *>(tensor.Data()));
+}
+#endif
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/utils.h b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.h
new file mode 100755
index 0000000000..0bf48e6d52
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/utils.h
@@ -0,0 +1,55 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/core/core.hpp"
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+
+#ifdef ENABLE_FLYCV
+#include "flycv.h" // NOLINT
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+// Convert data type of opencv to FDDataType
+FDDataType OpenCVDataTypeToFD(int type);
+// Create data type of opencv by FDDataType
+int CreateOpenCVDataType(FDDataType type, int channel = 1);
+#ifdef ENABLE_FLYCV
+// Convert data type of flycv to FDDataType
+FDDataType FlyCVDataTypeToFD(fcv::FCVImageType type);
+// Create data type of flycv by FDDataType
+fcv::FCVImageType CreateFlyCVDataType(FDDataType type, int channel = 1);
+// Convert cv::Mat to fcv::Mat
+fcv::Mat ConvertOpenCVMatToFlyCV(cv::Mat &im);
+// Convert fcv::Mat to fcv::mat
+cv::Mat ConvertFlyCVMatToOpenCV(fcv::Mat &fim);
+#endif
+
+// Create zero copy OpenCV/FlyCV Mat from FD Tensor / Buffer
+cv::Mat CreateZeroCopyOpenCVMatFromBuffer(int height, int width, int channels,
+                                          FDDataType type, void *data);
+cv::Mat CreateZeroCopyOpenCVMatFromTensor(const FDTensor &tensor,
+                                          Layout layout = Layout::HWC);
+#ifdef ENABLE_FLYCV
+fcv::Mat CreateZeroCopyFlyCVMatFromBuffer(int height, int width, int channels,
+                                          FDDataType type, void *data);
+fcv::Mat CreateZeroCopyFlyCVMatFromTensor(const FDTensor &tensor);
+#endif
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc
new file mode 100755
index 0000000000..e1707b05e4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/common/processors/warp_affine.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool WarpAffine::ImplByOpenCV(Mat *mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "WarpAffine: The format of input is not HWC." << std::endl;
+    return false;
+  }
+  cv::Mat *im = mat->GetOpenCVMat();
+  if (width_ > 0 && height_ > 0) {
+    cv::warpAffine(*im, *im, trans_matrix_, cv::Size(width_, height_), interp_,
+                   border_mode_, borderValue_);
+  } else {
+    FDERROR
+        << "WarpAffine: the parameters must satisfy (width > 0 && height > 0) ."
+        << std::endl;
+    return false;
+  }
+  mat->SetWidth(im->cols);
+  mat->SetHeight(im->rows);
+
+  return true;
+}
+
+bool WarpAffine::Run(Mat *mat, const cv::Mat &trans_matrix, int width,
+                     int height, int interp, int border_mode,
+                     const cv::Scalar &borderValue, ProcLib lib) {
+  auto r =
+      WarpAffine(trans_matrix, width, height, interp, border_mode, borderValue);
+  return r(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h
new file mode 100755
index 0000000000..9994acb6c0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/processors/warp_affine.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+
+class WarpAffine : public Processor {
+public:
+  WarpAffine(const cv::Mat &trans_matrix, int width, int height, int interp = 1,
+             int border_mode = 0,
+             const cv::Scalar &borderValue = cv::Scalar()) {
+    trans_matrix_ = trans_matrix;
+    width_ = width;
+    height_ = height;
+    interp_ = interp;
+    border_mode_ = border_mode;
+    borderValue_ = borderValue;
+  }
+
+  bool ImplByOpenCV(Mat *mat);
+  std::string Name() { return "WarpAffine"; }
+
+  bool SetTransformMatrix(const cv::Mat &trans_matrix) {
+    trans_matrix_ = trans_matrix;
+    return true;
+  }
+
+  std::tuple<int, int> GetWidthAndHeight() {
+    return std::make_tuple(width_, height_);
+  }
+
+  static bool Run(Mat *mat, const cv::Mat &trans_matrix, int width, int height,
+                  int interp = 1, int border_mode = 0,
+                  const cv::Scalar &borderValue = cv::Scalar(),
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  cv::Mat trans_matrix_;
+  int width_;
+  int height_;
+  int interp_;
+  int border_mode_;
+  cv::Scalar borderValue_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/result.cc b/libs/ultrainfer/ultrainfer/vision/common/result.cc
new file mode 100755
index 0000000000..461da828d1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/result.cc
@@ -0,0 +1,944 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+void ClassifyResult::Free() {
+  std::vector<int32_t>().swap(label_ids);
+  std::vector<float>().swap(scores);
+  std::vector<float>().swap(feature);
+}
+
+void ClassifyResult::Clear() {
+  label_ids.clear();
+  scores.clear();
+  feature.clear();
+}
+
+void ClassifyResult::Resize(int size) {
+  label_ids.resize(size);
+  scores.resize(size);
+  // TODO(qiuyanjun): feature not perform resize now.
+  // may need the code below for future.
+  // feature.resize(size);
+}
+
+std::string ClassifyResult::Str() {
+  std::string out;
+  out = "ClassifyResult(\nlabel_ids: ";
+  for (size_t i = 0; i < label_ids.size(); ++i) {
+    out = out + std::to_string(label_ids[i]) + ", ";
+  }
+  out += "\nscores: ";
+  for (size_t i = 0; i < scores.size(); ++i) {
+    out = out + std::to_string(scores[i]) + ", ";
+  }
+  if (!feature.empty()) {
+    out += "\nfeature: size (";
+    out += std::to_string(feature.size()) + "), only show first 100 values.\n";
+    for (size_t i = 0; i < feature.size(); ++i) {
+      // only show first 100 values.
+      if ((i + 1) <= 100) {
+        out = out + std::to_string(feature[i]) + ", ";
+        if ((i + 1) % 10 == 0 && (i + 1) < 100) {
+          out += "\n";
+        }
+        if ((i + 1) == 100) {
+          out += "\n......";
+        }
+      }
+    }
+  }
+  out += "\n)";
+  return out;
+}
+
+ClassifyResult &ClassifyResult::operator=(ClassifyResult &&other) {
+  if (&other != this) {
+    label_ids = std::move(other.label_ids);
+    scores = std::move(other.scores);
+    feature = std::move(other.feature);
+  }
+  return *this;
+}
+
+void Mask::Reserve(int size) { data.reserve(size); }
+
+void Mask::Resize(int size) { data.resize(size); }
+
+void Mask::Free() {
+  std::vector<uint32_t>().swap(data);
+  std::vector<int64_t>().swap(shape);
+}
+
+void Mask::Clear() {
+  data.clear();
+  shape.clear();
+}
+
+std::string Mask::Str() {
+  std::string out = "Mask(";
+  size_t ndim = shape.size();
+  for (size_t i = 0; i < ndim; ++i) {
+    if (i < ndim - 1) {
+      out += std::to_string(shape[i]) + ",";
+    } else {
+      out += std::to_string(shape[i]);
+    }
+  }
+  out += ")\n";
+  return out;
+}
+
+DetectionResult::DetectionResult(const DetectionResult &res) {
+  boxes.assign(res.boxes.begin(), res.boxes.end());
+  rotated_boxes.assign(res.rotated_boxes.begin(), res.rotated_boxes.end());
+  scores.assign(res.scores.begin(), res.scores.end());
+  label_ids.assign(res.label_ids.begin(), res.label_ids.end());
+  contain_masks = res.contain_masks;
+  if (contain_masks) {
+    masks.clear();
+    size_t mask_size = res.masks.size();
+    for (size_t i = 0; i < mask_size; ++i) {
+      masks.emplace_back(res.masks[i]);
+    }
+  }
+}
+
+DetectionResult &DetectionResult::operator=(DetectionResult &&other) {
+  if (&other != this) {
+    boxes = std::move(other.boxes);
+    rotated_boxes = std::move(other.rotated_boxes);
+    scores = std::move(other.scores);
+    label_ids = std::move(other.label_ids);
+    contain_masks = std::move(other.contain_masks);
+    if (contain_masks) {
+      masks.clear();
+      masks = std::move(other.masks);
+    }
+  }
+  return *this;
+}
+
+void DetectionResult::Free() {
+  std::vector<std::array<float, 4>>().swap(boxes);
+  std::vector<std::array<float, 8>>().swap(rotated_boxes);
+  std::vector<float>().swap(scores);
+  std::vector<int32_t>().swap(label_ids);
+  std::vector<Mask>().swap(masks);
+  contain_masks = false;
+}
+
+void DetectionResult::Clear() {
+  boxes.clear();
+  rotated_boxes.clear();
+  scores.clear();
+  label_ids.clear();
+  masks.clear();
+  contain_masks = false;
+}
+
+void DetectionResult::Reserve(int size) {
+  boxes.reserve(size);
+  rotated_boxes.reserve(size);
+  scores.reserve(size);
+  label_ids.reserve(size);
+  if (contain_masks) {
+    masks.reserve(size);
+  }
+}
+
+void DetectionResult::Resize(int size) {
+  boxes.resize(size);
+  rotated_boxes.resize(size);
+  scores.resize(size);
+  label_ids.resize(size);
+  if (contain_masks) {
+    masks.resize(size);
+  }
+}
+
+std::string DetectionResult::Str() {
+  std::string out;
+  if (!contain_masks) {
+    out = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]\n";
+    if (!rotated_boxes.empty()) {
+      out = "DetectionResult: [x1, y1, x2, y2, x3, y3, x4, y4, score, "
+            "label_id]\n";
+    }
+  } else {
+    out = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id, "
+          "mask_shape]\n";
+    if (!rotated_boxes.empty()) {
+      out =
+          "DetectionResult: [x1, y1, x2, y2, x3, y3, x4, y4, score, label_id, "
+          "mask_shape]\n";
+    }
+  }
+  for (size_t i = 0; i < boxes.size(); ++i) {
+    out = out + std::to_string(boxes[i][0]) + "," +
+          std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
+          ", " + std::to_string(boxes[i][3]) + ", " +
+          std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]);
+    if (!contain_masks) {
+      out += "\n";
+    } else {
+      out += ", " + masks[i].Str();
+    }
+  }
+
+  for (size_t i = 0; i < rotated_boxes.size(); ++i) {
+    out = out + std::to_string(rotated_boxes[i][0]) + "," +
+          std::to_string(rotated_boxes[i][1]) + ", " +
+          std::to_string(rotated_boxes[i][2]) + ", " +
+          std::to_string(rotated_boxes[i][3]) + ", " +
+          std::to_string(rotated_boxes[i][4]) + "," +
+          std::to_string(rotated_boxes[i][5]) + ", " +
+          std::to_string(rotated_boxes[i][6]) + ", " +
+          std::to_string(rotated_boxes[i][7]) + ", " +
+          std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]);
+    out += "\n";
+  }
+  return out;
+}
+
+// PerceptionResult -----------------------------------------------------
+PerceptionResult::PerceptionResult(const PerceptionResult &res) {
+  scores.assign(res.scores.begin(), res.scores.end());
+  label_ids.assign(res.label_ids.begin(), res.label_ids.end());
+  boxes.assign(res.boxes.begin(), res.boxes.end());
+  center.assign(res.center.begin(), res.center.end());
+  observation_angle.assign(res.observation_angle.begin(),
+                           res.observation_angle.end());
+  yaw_angle.assign(res.yaw_angle.begin(), res.yaw_angle.end());
+  velocity.assign(res.velocity.begin(), res.velocity.end());
+  valid.assign(res.valid.begin(), res.valid.end());
+}
+
+PerceptionResult &PerceptionResult::operator=(PerceptionResult &&other) {
+  if (&other != this) {
+    scores = std::move(other.scores);
+    label_ids = std::move(other.label_ids);
+    boxes = std::move(other.boxes);
+    center = std::move(other.center);
+    observation_angle = std::move(other.observation_angle);
+    yaw_angle = std::move(other.yaw_angle);
+    velocity = std::move(other.velocity);
+    valid = std::move(other.valid);
+  }
+  return *this;
+}
+
+void PerceptionResult::Free() {
+  std::vector<float>().swap(scores);
+  std::vector<int32_t>().swap(label_ids);
+  std::vector<std::array<float, 7>>().swap(boxes);
+  std::vector<std::array<float, 3>>().swap(center);
+  std::vector<float>().swap(observation_angle);
+  std::vector<float>().swap(yaw_angle);
+  std::vector<std::array<float, 3>>().swap(velocity);
+  std::vector<bool>().swap(valid);
+}
+
+void PerceptionResult::Clear() {
+  scores.clear();
+  label_ids.clear();
+  boxes.clear();
+  center.clear();
+  observation_angle.clear();
+  yaw_angle.clear();
+  velocity.clear();
+  valid.clear();
+}
+
+void PerceptionResult::Reserve(int size) {
+  scores.reserve(size);
+  label_ids.reserve(size);
+  boxes.reserve(size);
+  center.reserve(size);
+  observation_angle.reserve(size);
+  yaw_angle.reserve(size);
+  velocity.reserve(size);
+}
+
+void PerceptionResult::Resize(int size) {
+  scores.resize(size);
+  label_ids.resize(size);
+  boxes.resize(size);
+  center.resize(size);
+  observation_angle.resize(size);
+  yaw_angle.resize(size);
+  velocity.resize(size);
+}
+
+std::string PerceptionResult::Str() {
+  std::string out;
+  out = "PerceptionResult: [";
+  if (valid[2]) {
+    out += "xmin, ymin, xmax, ymax, w, h, l,";
+  }
+  if (valid[3]) {
+    out += " cx, cy, cz,";
+  }
+  if (valid[5]) {
+    out += " yaw_angle,";
+  }
+  if (valid[4]) {
+    out += " ob_angle,";
+  }
+  if (valid[0]) {
+    out += " score,";
+  }
+  if (valid[1]) {
+    out += " label_id,";
+  }
+  out += "]\n";
+
+  for (size_t i = 0; i < boxes.size(); ++i) {
+    if (valid[2]) {
+      out = out + std::to_string(boxes[i][0]) + "," +
+            std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
+            ", " + std::to_string(boxes[i][3]) + ", " +
+            std::to_string(boxes[i][4]) + ", " + std::to_string(boxes[i][5]) +
+            ", " + std::to_string(boxes[i][6]) + ", ";
+    }
+    if (valid[3]) {
+      out = out + std::to_string(center[i][0]) + ", " +
+            std::to_string(center[i][1]) + ", " + std::to_string(center[i][2]) +
+            ", ";
+    }
+    if (valid[5]) {
+      out = out + std::to_string(yaw_angle[i]) + ", ";
+    }
+    if (valid[4]) {
+      out = out + std::to_string(observation_angle[i]) + ", ";
+    }
+    if (valid[0]) {
+      out = out + std::to_string(scores[i]) + ", ";
+    }
+    if (valid[1]) {
+      out = out + std::to_string(label_ids[i]);
+    }
+    out += "\n";
+  }
+  return out;
+}
+
+// PerceptionResult finished
+
+void KeyPointDetectionResult::Free() {
+  std::vector<std::array<float, 2>>().swap(keypoints);
+  std::vector<float>().swap(scores);
+  num_joints = -1;
+}
+
+void KeyPointDetectionResult::Clear() {
+  keypoints.clear();
+  scores.clear();
+  num_joints = -1;
+}
+
+void KeyPointDetectionResult::Reserve(int size) { keypoints.reserve(size); }
+
+void KeyPointDetectionResult::Resize(int size) { keypoints.resize(size); }
+
+std::string KeyPointDetectionResult::Str() {
+  std::string out;
+
+  out = "KeyPointDetectionResult: [x, y, conf]\n";
+  for (size_t i = 0; i < keypoints.size(); ++i) {
+    out = out + std::to_string(keypoints[i][0]) + "," +
+          std::to_string(keypoints[i][1]) + ", " + std::to_string(scores[i]) +
+          "\n";
+  }
+  out += "num_joints:" + std::to_string(num_joints) + "\n";
+  return out;
+}
+
+void OCRResult::Clear() {
+  boxes.clear();
+  text.clear();
+  rec_scores.clear();
+  cls_scores.clear();
+  cls_labels.clear();
+}
+
+void OCRCURVEResult::Clear() {
+  boxes.clear();
+  text.clear();
+  rec_scores.clear();
+  cls_scores.clear();
+  cls_labels.clear();
+}
+
+void MOTResult::Clear() {
+  boxes.clear();
+  ids.clear();
+  scores.clear();
+  class_ids.clear();
+}
+
+std::string MOTResult::Str() {
+  std::string out;
+  out = "MOTResult:\nall boxes counts: " + std::to_string(boxes.size()) + "\n";
+  out += "[xmin\tymin\txmax\tymax\tid\tscore]\n";
+  for (size_t i = 0; i < boxes.size(); ++i) {
+    out = out + "[" + std::to_string(boxes[i][0]) + "\t" +
+          std::to_string(boxes[i][1]) + "\t" + std::to_string(boxes[i][2]) +
+          "\t" + std::to_string(boxes[i][3]) + "\t" + std::to_string(ids[i]) +
+          "\t" + std::to_string(scores[i]) + "]\n";
+  }
+  return out;
+}
+
+FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult &res) {
+  boxes.assign(res.boxes.begin(), res.boxes.end());
+  landmarks.assign(res.landmarks.begin(), res.landmarks.end());
+  scores.assign(res.scores.begin(), res.scores.end());
+  landmarks_per_face = res.landmarks_per_face;
+}
+
+void FaceDetectionResult::Free() {
+  std::vector<std::array<float, 4>>().swap(boxes);
+  std::vector<float>().swap(scores);
+  std::vector<std::array<float, 2>>().swap(landmarks);
+  landmarks_per_face = 0;
+}
+
+void FaceDetectionResult::Clear() {
+  boxes.clear();
+  scores.clear();
+  landmarks.clear();
+  landmarks_per_face = 0;
+}
+
+void FaceDetectionResult::Reserve(int size) {
+  boxes.reserve(size);
+  scores.reserve(size);
+  if (landmarks_per_face > 0) {
+    landmarks.reserve(size * landmarks_per_face);
+  }
+}
+
+void FaceDetectionResult::Resize(int size) {
+  boxes.resize(size);
+  scores.resize(size);
+  if (landmarks_per_face > 0) {
+    landmarks.resize(size * landmarks_per_face);
+  }
+}
+
+std::string FaceDetectionResult::Str() {
+  std::string out;
+  // format without landmarks
+  if (landmarks_per_face <= 0) {
+    out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score]\n";
+    for (size_t i = 0; i < boxes.size(); ++i) {
+      out = out + std::to_string(boxes[i][0]) + "," +
+            std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
+            ", " + std::to_string(boxes[i][3]) + ", " +
+            std::to_string(scores[i]) + "\n";
+    }
+    return out;
+  }
+  // format with landmarks
+  FDASSERT((landmarks.size() == boxes.size() * landmarks_per_face),
+           "The size of landmarks != boxes.size * landmarks_per_face.");
+  out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x " +
+        std::to_string(landmarks_per_face) + "]\n";
+  for (size_t i = 0; i < boxes.size(); ++i) {
+    out = out + std::to_string(boxes[i][0]) + "," +
+          std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
+          ", " + std::to_string(boxes[i][3]) + ", " +
+          std::to_string(scores[i]) + ", ";
+    for (size_t j = 0; j < landmarks_per_face; ++j) {
+      out = out + "(" +
+            std::to_string(landmarks[i * landmarks_per_face + j][0]) + "," +
+            std::to_string(landmarks[i * landmarks_per_face + j][1]);
+      if (j < landmarks_per_face - 1) {
+        out = out + "), ";
+      } else {
+        out = out + ")\n";
+      }
+    }
+  }
+  return out;
+}
+
+void FaceAlignmentResult::Free() {
+  std::vector<std::array<float, 2>>().swap(landmarks);
+}
+
+void FaceAlignmentResult::Clear() { landmarks.clear(); }
+
+void FaceAlignmentResult::Reserve(int size) { landmarks.resize(size); }
+
+void FaceAlignmentResult::Resize(int size) { landmarks.resize(size); }
+
+std::string FaceAlignmentResult::Str() {
+  std::string out;
+
+  out = "FaceAlignmentResult: [x, y]\n";
+  out = out + "There are " + std::to_string(landmarks.size()) +
+        " landmarks, the top 10 are listed as below:\n";
+  int landmarks_size = landmarks.size();
+  size_t result_length = std::min(10, landmarks_size);
+  for (size_t i = 0; i < result_length; ++i) {
+    out = out + std::to_string(landmarks[i][0]) + "," +
+          std::to_string(landmarks[i][1]) + "\n";
+  }
+  out += "num_landmarks:" + std::to_string(landmarks.size()) + "\n";
+  return out;
+}
+
+void SegmentationResult::Clear() {
+  label_map.clear();
+  score_map.clear();
+  shape.clear();
+  contain_score_map = false;
+}
+
+void SegmentationResult::Free() {
+  std::vector<uint8_t>().swap(label_map);
+  std::vector<float>().swap(score_map);
+  std::vector<int64_t>().swap(shape);
+  contain_score_map = false;
+}
+
+void SegmentationResult::Reserve(int size) {
+  label_map.reserve(size);
+  if (contain_score_map) {
+    score_map.reserve(size);
+  }
+}
+
+void SegmentationResult::Resize(int size) {
+  label_map.resize(size);
+  if (contain_score_map) {
+    score_map.resize(size);
+  }
+}
+
+std::string SegmentationResult::Str() {
+  std::string out;
+  out = "SegmentationResult Image masks 10 rows x 10 cols: \n";
+  for (size_t i = 0; i < 10; ++i) {
+    out += "[";
+    for (size_t j = 0; j < 10; ++j) {
+      out = out + std::to_string(label_map[i * 10 + j]) + ", ";
+    }
+    out += ".....]\n";
+  }
+  out += "...........\n";
+  if (contain_score_map) {
+    out += "SegmentationResult Score map 10 rows x 10 cols: \n";
+    for (size_t i = 0; i < 10; ++i) {
+      out += "[";
+      for (size_t j = 0; j < 10; ++j) {
+        out = out + std::to_string(score_map[i * 10 + j]) + ", ";
+      }
+      out += ".....]\n";
+    }
+    out += "...........\n";
+  }
+  out += "result shape is: [" + std::to_string(shape[0]) + " " +
+         std::to_string(shape[1]) + "]";
+  return out;
+}
+
+SegmentationResult &SegmentationResult::operator=(SegmentationResult &&other) {
+  if (&other != this) {
+    label_map = std::move(other.label_map);
+    shape = std::move(other.shape);
+    contain_score_map = std::move(other.contain_score_map);
+    if (contain_score_map) {
+      score_map.clear();
+      score_map = std::move(other.score_map);
+    }
+  }
+  return *this;
+}
+FaceRecognitionResult::FaceRecognitionResult(const FaceRecognitionResult &res) {
+  embedding.assign(res.embedding.begin(), res.embedding.end());
+}
+
+void FaceRecognitionResult::Free() { std::vector<float>().swap(embedding); }
+
+void FaceRecognitionResult::Clear() { embedding.clear(); }
+
+void FaceRecognitionResult::Reserve(int size) { embedding.reserve(size); }
+
+void FaceRecognitionResult::Resize(int size) { embedding.resize(size); }
+
+std::string FaceRecognitionResult::Str() {
+  std::string out;
+  out = "FaceRecognitionResult: [";
+  size_t numel = embedding.size();
+  if (numel <= 0) {
+    return out + "Empty Result]";
+  }
+  // max, min, mean
+  float min_val = embedding.at(0);
+  float max_val = embedding.at(0);
+  float total_val = embedding.at(0);
+  for (size_t i = 1; i < numel; ++i) {
+    float val = embedding.at(i);
+    total_val += val;
+    if (val < min_val) {
+      min_val = val;
+    }
+    if (val > max_val) {
+      max_val = val;
+    }
+  }
+  float mean_val = total_val / static_cast<float>(numel);
+  out = out + "Dim(" + std::to_string(numel) + "), " + "Min(" +
+        std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) +
+        "), " + "Mean(" + std::to_string(mean_val) + ")]\n";
+  return out;
+}
+
+MattingResult::MattingResult(const MattingResult &res) {
+  alpha.assign(res.alpha.begin(), res.alpha.end());
+  foreground.assign(res.foreground.begin(), res.foreground.end());
+  shape.assign(res.shape.begin(), res.shape.end());
+  contain_foreground = res.contain_foreground;
+}
+
+void MattingResult::Clear() {
+  alpha.clear();
+  foreground.clear();
+  shape.clear();
+  contain_foreground = false;
+}
+
+void MattingResult::Free() {
+  std::vector<float>().swap(alpha);
+  std::vector<float>().swap(foreground);
+  std::vector<int64_t>().swap(shape);
+  contain_foreground = false;
+}
+
+void MattingResult::Reserve(int size) {
+  alpha.reserve(size);
+  if (contain_foreground) {
+    FDASSERT((shape.size() == 3),
+             "Please initial shape (h,w,c) before call Reserve.");
+    int c = static_cast<int>(shape[2]);
+    foreground.reserve(size * c);
+  }
+}
+
+void MattingResult::Resize(int size) {
+  alpha.resize(size);
+  if (contain_foreground) {
+    FDASSERT((shape.size() == 3),
+             "Please initial shape (h,w,c) before call Resize.");
+    int c = static_cast<int>(shape[2]);
+    foreground.resize(size * c);
+  }
+}
+
+std::string MattingResult::Str() {
+  std::string out;
+  out = "MattingResult[";
+  if (contain_foreground) {
+    out += "Foreground(true)";
+  } else {
+    out += "Foreground(false)";
+  }
+  out += ", Alpha(";
+  size_t numel = alpha.size();
+  if (numel <= 0) {
+    return out + "[Empty Result]";
+  }
+  // max, min, mean
+  float min_val = alpha.at(0);
+  float max_val = alpha.at(0);
+  float total_val = alpha.at(0);
+  for (size_t i = 1; i < numel; ++i) {
+    float val = alpha.at(i);
+    total_val += val;
+    if (val < min_val) {
+      min_val = val;
+    }
+    if (val > max_val) {
+      max_val = val;
+    }
+  }
+  float mean_val = total_val / static_cast<float>(numel);
+  // shape
+  std::string shape_str = "Shape(";
+  for (size_t i = 0; i < shape.size(); ++i) {
+    if ((i + 1) != shape.size()) {
+      shape_str += std::to_string(shape[i]) + ",";
+    } else {
+      shape_str += std::to_string(shape[i]) + ")";
+    }
+  }
+  out = out + "Numel(" + std::to_string(numel) + "), " + shape_str + ", Min(" +
+        std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) +
+        "), " + "Mean(" + std::to_string(mean_val) + "))]\n";
+  return out;
+}
+
+std::string OCRResult::Str() {
+  std::string no_result;
+  if (boxes.size() > 0) {
+    std::string out;
+    for (int n = 0; n < boxes.size(); n++) {
+      out = out + "det boxes: [";
+      for (int i = 0; i < 4; i++) {
+        out = out + "[" + std::to_string(boxes[n][i * 2]) + "," +
+              std::to_string(boxes[n][i * 2 + 1]) + "]";
+
+        if (i != 3) {
+          out = out + ",";
+        }
+      }
+      out = out + "]";
+
+      if (rec_scores.size() > 0) {
+        out = out + "rec text: " + text[n] +
+              " rec score:" + std::to_string(rec_scores[n]) + " ";
+      }
+      if (cls_labels.size() > 0) {
+        out = out + "cls label: " + std::to_string(cls_labels[n]) +
+              " cls score: " + std::to_string(cls_scores[n]);
+      }
+      out = out + "\n";
+    }
+
+    if (table_boxes.size() > 0 && table_structure.size() > 0) {
+      for (int n = 0; n < boxes.size(); n++) {
+        out = out + "table boxes: [";
+        for (int i = 0; i < 4; i++) {
+          out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
+                std::to_string(table_boxes[n][i * 2 + 1]) + "]";
+
+          if (i != 3) {
+            out = out + ",";
+          }
+        }
+        out = out + "]\n";
+      }
+
+      out = out + "\ntable structure: \n";
+      for (int m = 0; m < table_structure.size(); m++) {
+        out += table_structure[m];
+      }
+
+      if (!table_html.empty()) {
+        out = out + "\n" + "table html: \n" + table_html;
+      }
+    }
+    std::vector<std::array<int, 8>> table_boxes;
+    std::vector<std::string> table_structure;
+    return out;
+
+  } else if (boxes.size() == 0 && rec_scores.size() > 0 &&
+             cls_scores.size() > 0) {
+    std::string out;
+    for (int i = 0; i < rec_scores.size(); i++) {
+      out = out + "rec text: " + text[i] +
+            " rec score:" + std::to_string(rec_scores[i]) + " ";
+      out = out + "cls label: " + std::to_string(cls_labels[i]) +
+            " cls score: " + std::to_string(cls_scores[i]);
+      out = out + "\n";
+    }
+    return out;
+  } else if (boxes.size() == 0 && rec_scores.size() == 0 &&
+             cls_scores.size() > 0) {
+    std::string out;
+    for (int i = 0; i < cls_scores.size(); i++) {
+      out = out + "cls label: " + std::to_string(cls_labels[i]) +
+            " cls score: " + std::to_string(cls_scores[i]);
+      out = out + "\n";
+    }
+    return out;
+  } else if (boxes.size() == 0 && rec_scores.size() > 0 &&
+             cls_scores.size() == 0) {
+    std::string out;
+    for (int i = 0; i < rec_scores.size(); i++) {
+      out = out + "rec text: " + text[i] +
+            " rec score:" + std::to_string(rec_scores[i]) + " ";
+      out = out + "\n";
+    }
+    return out;
+  } else if (boxes.size() == 0 && table_boxes.size() > 0 &&
+             table_structure.size() > 0) {
+    std::string out;
+    for (int n = 0; n < table_boxes.size(); n++) {
+      out = out + "table boxes: [";
+      for (int i = 0; i < 4; i++) {
+        out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
+              std::to_string(table_boxes[n][i * 2 + 1]) + "]";
+
+        if (i != 3) {
+          out = out + ",";
+        }
+      }
+      out = out + "]\n";
+    }
+
+    out = out + "\ntable structure: \n";
+    for (int m = 0; m < table_structure.size(); m++) {
+      out += table_structure[m];
+    }
+
+    if (!table_html.empty()) {
+      out = out + "\n" + "table html: \n" + table_html;
+    }
+    return out;
+  }
+
+  no_result = no_result + "No Results!";
+  return no_result;
+}
+
+std::string OCRCURVEResult::Str() {
+  std::string no_result;
+  if (boxes.size() > 0) {
+    std::string out;
+    for (int n = 0; n < boxes.size(); n++) {
+      out = out + "det boxes: [";
+      for (int i = 0; i < boxes[n].size() / 2; i++) {
+        out = out + "[" + std::to_string(boxes[n][i * 2]) + "," +
+              std::to_string(boxes[n][i * 2 + 1]) + "]";
+
+        if (i != boxes[n].size() / 2 - 1) {
+          out = out + ",";
+        }
+      }
+      out = out + "]";
+
+      if (rec_scores.size() > 0) {
+        out = out + "rec text: " + text[n] +
+              " rec score:" + std::to_string(rec_scores[n]) + " ";
+      }
+      if (cls_labels.size() > 0) {
+        out = out + "cls label: " + std::to_string(cls_labels[n]) +
+              " cls score: " + std::to_string(cls_scores[n]);
+      }
+      out = out + "\n";
+    }
+
+    if (table_boxes.size() > 0 && table_structure.size() > 0) {
+      for (int n = 0; n < boxes.size(); n++) {
+        out = out + "table boxes: [";
+        for (int i = 0; i < 4; i++) {
+          out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
+                std::to_string(table_boxes[n][i * 2 + 1]) + "]";
+
+          if (i != 3) {
+            out = out + ",";
+          }
+        }
+        out = out + "]\n";
+      }
+
+      out = out + "\ntable structure: \n";
+      for (int m = 0; m < table_structure.size(); m++) {
+        out += table_structure[m];
+      }
+
+      if (!table_html.empty()) {
+        out = out + "\n" + "table html: \n" + table_html;
+      }
+    }
+    std::vector<std::array<int, 8>> table_boxes;
+    std::vector<std::string> table_structure;
+    return out;
+
+  } else if (boxes.size() == 0 && rec_scores.size() > 0 &&
+             cls_scores.size() > 0) {
+    std::string out;
+    for (int i = 0; i < rec_scores.size(); i++) {
+      out = out + "rec text: " + text[i] +
+            " rec score:" + std::to_string(rec_scores[i]) + " ";
+      out = out + "cls label: " + std::to_string(cls_labels[i]) +
+            " cls score: " + std::to_string(cls_scores[i]);
+      out = out + "\n";
+    }
+    return out;
+  } else if (boxes.size() == 0 && rec_scores.size() == 0 &&
+             cls_scores.size() > 0) {
+    std::string out;
+    for (int i = 0; i < cls_scores.size(); i++) {
+      out = out + "cls label: " + std::to_string(cls_labels[i]) +
+            " cls score: " + std::to_string(cls_scores[i]);
+      out = out + "\n";
+    }
+    return out;
+  } else if (boxes.size() == 0 && rec_scores.size() > 0 &&
+             cls_scores.size() == 0) {
+    std::string out;
+    for (int i = 0; i < rec_scores.size(); i++) {
+      out = out + "rec text: " + text[i] +
+            " rec score:" + std::to_string(rec_scores[i]) + " ";
+      out = out + "\n";
+    }
+    return out;
+  } else if (boxes.size() == 0 && table_boxes.size() > 0 &&
+             table_structure.size() > 0) {
+    std::string out;
+    for (int n = 0; n < table_boxes.size(); n++) {
+      out = out + "table boxes: [";
+      for (int i = 0; i < 4; i++) {
+        out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
+              std::to_string(table_boxes[n][i * 2 + 1]) + "]";
+
+        if (i != 3) {
+          out = out + ",";
+        }
+      }
+      out = out + "]\n";
+    }
+
+    out = out + "\ntable structure: \n";
+    for (int m = 0; m < table_structure.size(); m++) {
+      out += table_structure[m];
+    }
+
+    if (!table_html.empty()) {
+      out = out + "\n" + "table html: \n" + table_html;
+    }
+    return out;
+  }
+
+  no_result = no_result + "No Results!";
+  return no_result;
+}
+void HeadPoseResult::Free() { std::vector<float>().swap(euler_angles); }
+
+void HeadPoseResult::Clear() { euler_angles.clear(); }
+
+void HeadPoseResult::Reserve(int size) { euler_angles.resize(size); }
+
+void HeadPoseResult::Resize(int size) { euler_angles.resize(size); }
+
+std::string HeadPoseResult::Str() {
+  std::string out;
+
+  out = "HeadPoseResult: [yaw, pitch, roll]\n";
+  out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" +
+        "pitch: " + std::to_string(euler_angles[1]) + "\n" +
+        "roll: " + std::to_string(euler_angles[2]) + "\n";
+  return out;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/common/result.h b/libs/ultrainfer/ultrainfer/vision/common/result.h
new file mode 100755
index 0000000000..456b894205
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/common/result.h
@@ -0,0 +1,494 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "opencv2/core/core.hpp"
+#include "ultrainfer/ultrainfer_model.h"
+#include <set>
+
+namespace ultrainfer {
+/** \brief All C++ UltraInfer Vision Models APIs are defined inside this
+ * namespace
+ *
+ */
+namespace vision {
+enum ULTRAINFER_DECL ResultType {
+  UNKNOWN_RESULT,
+  CLASSIFY,
+  DETECTION,
+  SEGMENTATION,
+  OCR,
+  MOT,
+  FACE_DETECTION,
+  FACE_ALIGNMENT,
+  FACE_RECOGNITION,
+  MATTING,
+  MASK,
+  KEYPOINT_DETECTION,
+  HEADPOSE,
+  PERCEPTION,
+};
+
+struct ULTRAINFER_DECL BaseResult {
+  ResultType type = ResultType::UNKNOWN_RESULT;
+};
+
+/*! @brief Classify result structure for all the image classify models
+ */
+struct ULTRAINFER_DECL ClassifyResult : public BaseResult {
+  ClassifyResult() = default;
+  /// Classify result for an image
+  std::vector<int32_t> label_ids;
+  /// The confidence for each classify result
+  std::vector<float> scores;
+  /// The feature vector of recognizer, e.g, PP-ShiTuV2 Recognizer
+  std::vector<float> feature;
+  ResultType type = ResultType::CLASSIFY;
+
+  /// Resize ClassifyResult data buffer
+  void Resize(int size);
+
+  /// Clear ClassifyResult
+  void Clear();
+
+  /// Clear ClassifyResult and free the memory
+  void Free();
+
+  /// Copy constructor
+  ClassifyResult(const ClassifyResult &other) = default;
+  /// Move assignment
+  ClassifyResult &operator=(ClassifyResult &&other);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! Mask structure, used in DetectionResult for instance segmentation models
+ */
+struct ULTRAINFER_DECL Mask : public BaseResult {
+  /// Mask data buffer
+  std::vector<uint32_t> data;
+  /// Shape of mask
+  std::vector<int64_t> shape; // (H,W) ...
+  ResultType type = ResultType::MASK;
+
+  /// clear Mask result
+  void Clear();
+
+  /// Clear Mask result and free the memory
+  void Free();
+
+  /// Return a mutable pointer of the mask data buffer
+  void *Data() { return data.data(); }
+
+  /// Return a pointer of the mask data buffer for read only
+  const void *Data() const { return data.data(); }
+
+  /// Reserve size for mask data buffer
+  void Reserve(int size);
+
+  /// Resize the mask data buffer
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief Detection result structure for all the object detection models and
+ * instance segmentation models
+ */
+struct ULTRAINFER_DECL DetectionResult : public BaseResult {
+  DetectionResult() = default;
+  /** \brief All the detected object boxes for an input image, the size of
+   * `boxes` is the number of detected objects, and the element of `boxes` is a
+   * array of 4 float values, means [xmin, ymin, xmax, ymax]
+   */
+  std::vector<std::array<float, 4>> boxes;
+  /** \brief All the detected rotated object boxes for an input image, the size
+   * of `boxes` is the number of detected objects, and the element of
+   * `rotated_boxes` is an array of 8 float values, means [x1, y1, x2, y2, x3,
+   * y3, x4, y4]
+   */
+  std::vector<std::array<float, 8>> rotated_boxes;
+  /** \brief The confidence for all the detected objects
+   */
+  std::vector<float> scores;
+  /// The classify label for all the detected objects
+  std::vector<int32_t> label_ids;
+  /** \brief For instance segmentation model, `masks` is the predict mask for
+   * all the deteced objects
+   */
+  std::vector<Mask> masks;
+  /// Shows if the DetectionResult has mask
+  bool contain_masks = false;
+
+  ResultType type = ResultType::DETECTION;
+
+  /// Copy constructor
+  DetectionResult(const DetectionResult &res);
+  /// Move assignment
+  DetectionResult &operator=(DetectionResult &&other);
+
+  /// Clear DetectionResult
+  void Clear();
+
+  /// Clear DetectionResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief Detection result structure for all the object detection models and
+ * instance segmentation models
+ */
+struct ULTRAINFER_DECL PerceptionResult : public BaseResult {
+  PerceptionResult() = default;
+
+  std::vector<float> scores;
+
+  std::vector<int32_t> label_ids;
+  // xmin, ymin, xmax, ymax, h, w, l
+  std::vector<std::array<float, 7>> boxes;
+  // cx, cy, cz
+  std::vector<std::array<float, 3>> center;
+
+  std::vector<float> observation_angle;
+
+  std::vector<float> yaw_angle;
+  // vx, vy, vz
+  std::vector<std::array<float, 3>> velocity;
+
+  // valid results for func Str(): True for printing
+  // 0 scores
+  // 1 label_ids
+  // 2 boxes
+  // 3 center
+  // 4 observation_angle
+  // 5 yaw_angle
+  // 6 velocity
+  std::vector<bool> valid;
+
+  /// Copy constructor
+  PerceptionResult(const PerceptionResult &res);
+  /// Move assignment
+  PerceptionResult &operator=(PerceptionResult &&other);
+
+  /// Clear PerceptionResult
+  void Clear();
+
+  /// Clear PerceptionResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief KeyPoint Detection result structure for all the keypoint detection
+ * models
+ */
+struct ULTRAINFER_DECL KeyPointDetectionResult : public BaseResult {
+  /** \brief All the coordinates of detected keypoints for an input image, the
+   * size of `keypoints` is num_detected_objects * num_joints, and the element
+   * of `keypoint` is a array of 2 float values, means [x, y]
+   */
+  std::vector<std::array<float, 2>> keypoints;
+  //// The confidence for all the detected points
+  std::vector<float> scores;
+  //// Number of joints for a detected object
+  int num_joints = -1;
+
+  ResultType type = ResultType::KEYPOINT_DETECTION;
+  /// Clear KeyPointDetectionResult
+  void Clear();
+
+  /// Clear KeyPointDetectionResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+struct ULTRAINFER_DECL OCRResult : public BaseResult {
+  std::vector<std::array<int, 8>> boxes;
+
+  std::vector<std::string> text;
+  std::vector<float> rec_scores;
+
+  std::vector<float> cls_scores;
+  std::vector<int32_t> cls_labels;
+
+  std::vector<std::array<int, 8>> table_boxes;
+  std::vector<std::string> table_structure;
+  std::string table_html;
+
+  ResultType type = ResultType::OCR;
+
+  void Clear();
+
+  std::string Str();
+};
+
+struct ULTRAINFER_DECL OCRCURVEResult : public BaseResult {
+  std::vector<std::vector<int>> boxes;
+  std::vector<std::string> text;
+  std::vector<float> rec_scores;
+
+  std::vector<float> cls_scores;
+  std::vector<int32_t> cls_labels;
+
+  std::vector<std::array<int, 8>> table_boxes;
+  std::vector<std::string> table_structure;
+  std::string table_html;
+
+  ResultType type = ResultType::OCR;
+
+  void Clear();
+
+  std::string Str();
+};
+/*! @brief MOT(Multi-Object Tracking) result structure for all the MOT models
+ */
+struct ULTRAINFER_DECL MOTResult : public BaseResult {
+  /** \brief All the tracking object boxes for an input image, the size of
+   * `boxes` is the number of tracking objects, and the element of `boxes` is a
+   * array of 4 float values, means [xmin, ymin, xmax, ymax]
+   */
+  std::vector<std::array<int, 4>> boxes;
+  /** \brief All the tracking object ids
+   */
+  std::vector<int> ids;
+  /** \brief The confidence for all the tracking objects
+   */
+  std::vector<float> scores;
+  /** \brief The classify label id for all the tracking object
+   */
+  std::vector<int> class_ids;
+
+  ResultType type = ResultType::MOT;
+  /// Clear MOT result
+  void Clear();
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief Face detection result structure for all the face detection models
+ */
+struct ULTRAINFER_DECL FaceDetectionResult : public BaseResult {
+  /** \brief All the detected object boxes for an input image, the size of
+   * `boxes` is the number of detected objects, and the element of `boxes` is a
+   * array of 4 float values, means [xmin, ymin, xmax, ymax]
+   */
+  std::vector<std::array<float, 4>> boxes;
+  /** \brief
+   * If the model detect face with landmarks, every detected object box
+   * correspoing to a landmark, which is a array of 2 float values, means
+   * location [x,y]
+   */
+  std::vector<std::array<float, 2>> landmarks;
+  /** \brief
+   * Indicates the confidence of all targets detected from a single image, and
+   * the number of elements is consistent with boxes.size()
+   */
+  std::vector<float> scores;
+  ResultType type = ResultType::FACE_DETECTION;
+  /** \brief
+   * `landmarks_per_face` indicates the number of face landmarks for each
+   * detected face if the model's output contains face landmarks (such as
+   * YOLOv5Face, SCRFD, ...)
+   */
+  int landmarks_per_face;
+
+  FaceDetectionResult() { landmarks_per_face = 0; }
+  FaceDetectionResult(const FaceDetectionResult &res);
+  /// Clear FaceDetectionResult
+  void Clear();
+
+  /// Clear FaceDetectionResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief Face Alignment result structure for all the face alignment models
+ */
+struct ULTRAINFER_DECL FaceAlignmentResult : public BaseResult {
+  /** \brief All the coordinates of detected landmarks for an input image, and
+   * the element of `landmarks` is a array of 2 float values, means [x, y]
+   */
+  std::vector<std::array<float, 2>> landmarks;
+
+  ResultType type = ResultType::FACE_ALIGNMENT;
+  /// Clear FaceAlignmentResult
+  void Clear();
+
+  /// Clear FaceAlignmentResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief Segmentation result structure for all the segmentation models
+ */
+struct ULTRAINFER_DECL SegmentationResult : public BaseResult {
+  SegmentationResult() = default;
+  /** \brief
+   * `label_map` stores the pixel-level category labels for input image. the
+   * number of pixels is equal to label_map.size()
+   */
+  std::vector<uint8_t> label_map;
+  /** \brief
+   * `score_map` stores the probability of the predicted label for each pixel of
+   * input image.
+   */
+  std::vector<float> score_map;
+  /// The output shape, means [H, W]
+  std::vector<int64_t> shape;
+  /// SegmentationResult whether containing score_map
+  bool contain_score_map = false;
+
+  /// Copy constructor
+  SegmentationResult(const SegmentationResult &other) = default;
+  /// Move assignment
+  SegmentationResult &operator=(SegmentationResult &&other);
+
+  ResultType type = ResultType::SEGMENTATION;
+  /// Clear Segmentation result
+  void Clear();
+
+  /// Clear Segmentation result and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief Face recognition result structure for all the Face recognition models
+ */
+struct ULTRAINFER_DECL FaceRecognitionResult : public BaseResult {
+  /** \brief The feature embedding that represents the final extraction of the
+   * face recognition model can be used to calculate the feature similarity
+   * between faces.
+   */
+  std::vector<float> embedding;
+
+  ResultType type = ResultType::FACE_RECOGNITION;
+
+  FaceRecognitionResult() {}
+  FaceRecognitionResult(const FaceRecognitionResult &res);
+  /// Clear FaceRecognitionResult
+  void Clear();
+
+  /// Clear FaceRecognitionResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief Matting result structure for all the Matting models
+ */
+struct ULTRAINFER_DECL MattingResult : public BaseResult {
+  /** \brief
+  `alpha` is a one-dimensional vector, which is the predicted alpha transparency
+  value. The range of values is [0., 1.], and the length is hxw. h, w are the
+  height and width of the input image
+  */
+  std::vector<float> alpha; // h x w
+  /** \brief
+  If the model can predict foreground, `foreground` save the predicted
+  foreground image, the shape is [hight,width,channel] generally.
+  */
+  std::vector<float> foreground; // h x w x c (c=3 default)
+  /** \brief
+   * The shape of output result, when contain_foreground == false, shape only
+   * contains (h, w), when contain_foreground == true, shape contains (h, w, c),
+   * and c is generally 3
+   */
+  std::vector<int64_t> shape;
+  /** \brief
+  If the model can predict alpha matte and foreground, contain_foreground =
+  true, default false
+  */
+  bool contain_foreground = false;
+
+  ResultType type = ResultType::MATTING;
+
+  MattingResult() {}
+  MattingResult(const MattingResult &res);
+  /// Clear matting result
+  void Clear();
+
+  /// Free matting result
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+/*! @brief HeadPose result structure for all the headpose models
+ */
+struct ULTRAINFER_DECL HeadPoseResult : public BaseResult {
+  /** \brief EulerAngles for an input image, and the element of `euler_angles`
+   * is a vector, contains {yaw, pitch, roll}
+   */
+  std::vector<float> euler_angles;
+
+  ResultType type = ResultType::HEADPOSE;
+  /// Clear HeadPoseResult
+  void Clear();
+
+  /// Clear HeadPoseResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc
new file mode 100755
index 0000000000..94a6fd0d8b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.cc
@@ -0,0 +1,82 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+FastestDet::FastestDet(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool FastestDet::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool FastestDet::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool FastestDet::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<DetectionResult> *results) {
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h
new file mode 100755
index 0000000000..c63d7e1c59
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h"
+#include "ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief FastestDet model object used when to load a FastestDet model exported
+ * by FastestDet.
+ */
+class ULTRAINFER_DECL FastestDet : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./fastestdet.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  FastestDet(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "fastestdet"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, DetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  /// Get preprocessor reference of FastestDet
+  virtual FastestDetPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of FastestDet
+  virtual FastestDetPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  FastestDetPreprocessor preprocessor_;
+  FastestDetPostprocessor postprocessor_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc
new file mode 100755
index 0000000000..c7546f00a1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/fastestdet_pybind.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindFastestDet(pybind11::module &m) {
+  pybind11::class_<vision::detection::FastestDetPreprocessor>(
+      m, "FastestDetPreprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::FastestDetPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+             if (!self.Run(&images, &outputs, &ims_info)) {
+               throw std::runtime_error(
+                   "raise Exception('Failed to preprocess the input data in "
+                   "FastestDetPreprocessor.')");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return make_pair(outputs, ims_info);
+           })
+      .def_property("size", &vision::detection::FastestDetPreprocessor::GetSize,
+                    &vision::detection::FastestDetPreprocessor::SetSize);
+
+  pybind11::class_<vision::detection::FastestDetPostprocessor>(
+      m, "FastestDetPostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::FastestDetPostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "raise Exception('Failed to postprocess the runtime result "
+                   "in FastestDetPostprocessor.')");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::detection::FastestDetPostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "raise Exception('Failed to postprocess the runtime result "
+                   "in FastestDetPostprocessor.')");
+             }
+             return results;
+           })
+      .def_property(
+          "conf_threshold",
+          &vision::detection::FastestDetPostprocessor::GetConfThreshold,
+          &vision::detection::FastestDetPostprocessor::SetConfThreshold)
+      .def_property(
+          "nms_threshold",
+          &vision::detection::FastestDetPostprocessor::GetNMSThreshold,
+          &vision::detection::FastestDetPostprocessor::SetNMSThreshold);
+
+  pybind11::class_<vision::detection::FastestDet, UltraInferModel>(m,
+                                                                   "FastestDet")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::FastestDet &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::FastestDet &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::FastestDet::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::FastestDet::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc
new file mode 100755
index 0000000000..ee65d26279
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.cc
@@ -0,0 +1,133 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+FastestDetPostprocessor::FastestDetPostprocessor() {
+  conf_threshold_ = 0.65;
+  nms_threshold_ = 0.45;
+}
+float FastestDetPostprocessor::Sigmoid(float x) {
+  return 1.0f / (1.0f + exp(-x));
+}
+
+float FastestDetPostprocessor::Tanh(float x) {
+  return 2.0f / (1.0f + exp(-2 * x)) - 1;
+}
+
+bool FastestDetPostprocessor::Run(
+    const std::vector<FDTensor> &tensors, std::vector<DetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = 1;
+
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+
+    (*results)[bs].Clear();
+    // output (1,85,22,22) CHW
+    const float *output =
+        reinterpret_cast<const float *>(tensors[0].Data()) +
+        bs * tensors[0].shape[1] * tensors[0].shape[2] * tensors[0].shape[3];
+    int output_h = tensors[0].shape[2]; // out map height
+    int output_w = tensors[0].shape[3]; // out map weight
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+             "Cannot find input_shape or output_shape from im_info.");
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+
+    // handle output boxes from out map
+    for (int h = 0; h < output_h; h++) {
+      for (int w = 0; w < output_w; w++) {
+        // object score
+        int obj_score_index = (h * output_w) + w;
+        float obj_score = output[obj_score_index];
+
+        // find max class
+        int category = 0;
+        float max_score = 0.0f;
+        int class_num = tensors[0].shape[1] - 5;
+        for (size_t i = 0; i < class_num; i++) {
+          obj_score_index =
+              ((5 + i) * output_h * output_w) + (h * output_w) + w;
+          float cls_score = output[obj_score_index];
+          if (cls_score > max_score) {
+            max_score = cls_score;
+            category = i;
+          }
+        }
+        float score = pow(max_score, 0.4) * pow(obj_score, 0.6);
+
+        // score threshold
+        if (score <= conf_threshold_) {
+          continue;
+        }
+        if (score > conf_threshold_) {
+          // handle box x y w h
+          int x_offset_index = (1 * output_h * output_w) + (h * output_w) + w;
+          int y_offset_index = (2 * output_h * output_w) + (h * output_w) + w;
+          int box_width_index = (3 * output_h * output_w) + (h * output_w) + w;
+          int box_height_index = (4 * output_h * output_w) + (h * output_w) + w;
+
+          float x_offset = Tanh(output[x_offset_index]);
+          float y_offset = Tanh(output[y_offset_index]);
+          float box_width = Sigmoid(output[box_width_index]);
+          float box_height = Sigmoid(output[box_height_index]);
+
+          float cx = (w + x_offset) / output_w;
+          float cy = (h + y_offset) / output_h;
+
+          // convert from [x, y, w, h] to [x1, y1, x2, y2]
+          (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+              cx - box_width / 2.0f, cy - box_height / 2.0f,
+              cx + box_width / 2.0f, cy + box_height / 2.0f});
+          (*results)[bs].label_ids.push_back(category);
+          (*results)[bs].scores.push_back(score);
+        }
+      }
+    }
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    // scale boxes to origin shape
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      (*results)[bs].boxes[i][0] = ((*results)[bs].boxes[i][0]) * ipt_w;
+      (*results)[bs].boxes[i][1] = ((*results)[bs].boxes[i][1]) * ipt_h;
+      (*results)[bs].boxes[i][2] = ((*results)[bs].boxes[i][2]) * ipt_w;
+      (*results)[bs].boxes[i][3] = ((*results)[bs].boxes[i][3]) * ipt_h;
+    }
+    // NMS
+    utils::NMS(&((*results)[bs]), nms_threshold_);
+    // clip box
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      (*results)[bs].boxes[i][0] = std::max((*results)[bs].boxes[i][0], 0.0f);
+      (*results)[bs].boxes[i][1] = std::max((*results)[bs].boxes[i][1], 0.0f);
+      (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w);
+      (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h);
+    }
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h
new file mode 100755
index 0000000000..aecd7460b8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/postprocessor.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Postprocessor object for FastestDet serials model.
+ */
+class ULTRAINFER_DECL FastestDetPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for FastestDet serials model
+   */
+  FastestDetPostprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &tensors,
+      std::vector<DetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.65
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.65
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.45
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.45
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  float Sigmoid(float x);
+  float Tanh(float x);
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc
new file mode 100755
index 0000000000..f8b9a02939
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+FastestDetPreprocessor::FastestDetPreprocessor() {
+  size_ = {352, 352}; //{h,w}
+}
+
+bool FastestDetPreprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+
+  // process after image load
+  double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
+                                             static_cast<float>(mat->Width()));
+
+  // fastestdet's preprocess steps
+  // 1. resize
+  // 2. convert_and_permute(swap_rb=false)
+  Resize::Run(mat, size_[0], size_[1]); // resize
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  // convert to float and HWC2CHW
+  ConvertAndPermute::Run(mat, alpha, beta, false);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool FastestDetPreprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h
new file mode 100755
index 0000000000..12a41329b2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/fastestdet/preprocessor.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for FastestDet serials model.
+ */
+class ULTRAINFER_DECL FastestDetPreprocessor {
+public:
+  /** \brief Create a preprocessor instance for FastestDet serials model
+   */
+  FastestDetPreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {352, 352}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {352, 352}
+  std::vector<int> GetSize() const { return size_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  // target size, tuple of (width, height), default size = {352, 352}
+  std::vector<int> size_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc
new file mode 100755
index 0000000000..6f0d1c4efa
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.cc
@@ -0,0 +1,338 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/nanodet_plus.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace detection {
+
+struct NanoDetPlusCenterPoint {
+  int grid0;
+  int grid1;
+  int stride;
+};
+
+void GenerateNanoDetPlusCenterPoints(
+    const std::vector<int> &size, const std::vector<int> &downsample_strides,
+    std::vector<NanoDetPlusCenterPoint> *center_points) {
+  // size: tuple of input (width, height), e.g (320, 320)
+  // downsample_strides: downsample strides in NanoDet and
+  // NanoDet-Plus, e.g (8, 16, 32, 64)
+  const int width = size[0];
+  const int height = size[1];
+  for (const auto &ds : downsample_strides) {
+    int num_grid_w = width / ds;
+    int num_grid_h = height / ds;
+    for (int g1 = 0; g1 < num_grid_h; ++g1) {
+      for (int g0 = 0; g0 < num_grid_w; ++g0) {
+        (*center_points).emplace_back(NanoDetPlusCenterPoint{g0, g1, ds});
+      }
+    }
+  }
+}
+
+void WrapAndResize(Mat *mat, std::vector<int> size, std::vector<float> color,
+                   bool keep_ratio = false) {
+  // Reference: nanodet/data/transform/warp.py#L139
+  // size: tuple of input (width, height)
+  // The default value of `keep_ratio` is `fasle` in
+  // `config/nanodet-plus-m-1.5x_320.yml` for both
+  // train and val processes. So, we just let this
+  // option default `false` according to the official
+  // implementation in NanoDet and NanoDet-Plus.
+  // Note, this function will apply a normal resize
+  // operation to input Mat if the keep_ratio option
+  // is fasle and the behavior will be the same as
+  // yolov5's letterbox if keep_ratio is true.
+
+  // with keep_ratio = false (default)
+  if (!keep_ratio) {
+    int resize_h = size[1];
+    int resize_w = size[0];
+    if (resize_h != mat->Height() || resize_w != mat->Width()) {
+      Resize::Run(mat, resize_w, resize_h);
+    }
+    return;
+  }
+  // with keep_ratio = true, same as yolov5's letterbox
+  float r = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                     size[0] * 1.0f / static_cast<float>(mat->Width()));
+
+  int resize_h = int(round(static_cast<float>(mat->Height()) * r));
+  int resize_w = int(round(static_cast<float>(mat->Width()) * r));
+
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+void GFLRegression(const float *logits, size_t reg_num, float *offset) {
+  // Hint: reg_num = reg_max + 1
+  FDASSERT(((nullptr != logits) && (reg_num != 0)),
+           "NanoDetPlus: logits is nullptr or reg_num is 0 in GFLRegression.");
+  // softmax
+  float total_exp = 0.f;
+  std::vector<float> softmax_probs(reg_num);
+  for (size_t i = 0; i < reg_num; ++i) {
+    softmax_probs[i] = std::exp(logits[i]);
+    total_exp += softmax_probs[i];
+  }
+  for (size_t i = 0; i < reg_num; ++i) {
+    softmax_probs[i] = softmax_probs[i] / total_exp;
+  }
+  // gfl regression -> offset
+  for (size_t i = 0; i < reg_num; ++i) {
+    (*offset) += static_cast<float>(i) * softmax_probs[i];
+  }
+}
+
+NanoDetPlus::NanoDetPlus(const std::string &model_file,
+                         const std::string &params_file,
+                         const RuntimeOption &custom_option,
+                         const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool NanoDetPlus::Initialize() {
+  // parameters for preprocess
+  size = {320, 320};
+  padding_value = {0.0f, 0.0f, 0.0f};
+  keep_ratio = false;
+  downsample_strides = {8, 16, 32, 64};
+  max_wh = 4096.0f;
+  reg_max = 7;
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  return true;
+}
+
+bool NanoDetPlus::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // NanoDet-Plus preprocess steps
+  // 1. WrapAndResize
+  // 2. HWC->CHW
+  // 3. Normalize or Convert (keep BGR order)
+  WrapAndResize(mat, size, padding_value, keep_ratio);
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  // Compute `result = mat * alpha + beta` directly by channel
+  // Reference: /config/nanodet-plus-m-1.5x_320.yml#L89
+  // from mean: [103.53, 116.28, 123.675], std: [57.375, 57.12, 58.395]
+  // x' = (x - mean) / std to x'= x * alpha + beta.
+  // e.g alpha[0] = 0.017429f = 1.0f / 57.375f
+  // e.g beta[0] = -103.53f * 0.0174291f
+  std::vector<float> alpha = {0.017429f, 0.017507f, 0.017125f};
+  std::vector<float> beta = {-103.53f * 0.0174291f, -116.28f * 0.0175070f,
+                             -123.675f * 0.0171247f}; // BGR order
+  Convert::Run(mat, alpha, beta);
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool NanoDetPlus::Postprocess(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  // generate center points with dowmsample strides
+  std::vector<NanoDetPlusCenterPoint> center_points;
+  GenerateNanoDetPlusCenterPoints(size, downsample_strides, &center_points);
+
+  // infer_result shape might look like (1,2125,112)
+  const int num_cls_reg = infer_result.shape[2];           // e.g 112
+  const int num_classes = num_cls_reg - (reg_max + 1) * 4; // e.g 80
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    float *scores = data + i * num_cls_reg;
+    float *max_class_score = std::max_element(scores, scores + num_classes);
+    float confidence = (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(scores, max_class_score);
+    // fetch i-th center point
+    float grid0 = static_cast<float>(center_points.at(i).grid0);
+    float grid1 = static_cast<float>(center_points.at(i).grid1);
+    float downsample_stride = static_cast<float>(center_points.at(i).stride);
+    // apply gfl regression to get offsets (l,t,r,b)
+    float *logits = data + i * num_cls_reg + num_classes; // 32|44...
+    std::vector<float> offsets(4);
+    for (size_t j = 0; j < 4; ++j) {
+      GFLRegression(logits + j * (reg_max + 1), reg_max + 1, &offsets[j]);
+    }
+    // convert from offsets to [x1, y1, x2, y2]
+    float l = offsets[0]; // left
+    float t = offsets[1]; // top
+    float r = offsets[2]; // right
+    float b = offsets[3]; // bottom
+
+    float x1 = (grid0 - l) * downsample_stride; // cx - l x1
+    float y1 = (grid1 - t) * downsample_stride; // cy - t y1
+    float x2 = (grid0 + r) * downsample_stride; // cx + r x2
+    float y2 = (grid1 + b) * downsample_stride; // cy + b y2
+
+    result->boxes.emplace_back(
+        std::array<float, 4>{x1 + label_id * max_wh, y1 + label_id * max_wh,
+                             x2 + label_id * max_wh, y2 + label_id * max_wh});
+    // label_id * max_wh for multi classes NMS
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  // without keep_ratio
+  if (!keep_ratio) {
+    // x' = (x / out_w) * ipt_w = x / (out_w / ipt_w)
+    // y' = (y / out_h) * ipt_h = y / (out_h / ipt_h)
+    float r_w = out_w / ipt_w;
+    float r_h = out_h / ipt_h;
+    for (size_t i = 0; i < result->boxes.size(); ++i) {
+      int32_t label_id = (result->label_ids)[i];
+      // clip box
+      result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+      result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+      result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+      result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+      result->boxes[i][0] = std::max(result->boxes[i][0] / r_w, 0.0f);
+      result->boxes[i][1] = std::max(result->boxes[i][1] / r_h, 0.0f);
+      result->boxes[i][2] = std::max(result->boxes[i][2] / r_w, 0.0f);
+      result->boxes[i][3] = std::max(result->boxes[i][3] / r_h, 0.0f);
+      result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+      result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+      result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+      result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+    }
+    return true;
+  }
+  // with keep_ratio
+  float r = std::min(out_h / ipt_h, out_w / ipt_w);
+  float pad_h = (out_h - ipt_h * r) / 2;
+  float pad_w = (out_w - ipt_w * r) / 2;
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / r, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / r, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / r, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / r, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool NanoDetPlus::Predict(cv::Mat *im, DetectionResult *result,
+                          float conf_threshold, float nms_iou_threshold) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h
new file mode 100755
index 0000000000..745da299a0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus.h
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace detection {
+/*! @brief NanoDetPlus model object used when to load a NanoDetPlus model
+ * exported by NanoDet.
+ */
+class ULTRAINFER_DECL NanoDetPlus : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./nanodet_plus_320.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  NanoDetPlus(const std::string &model_file,
+              const std::string &params_file = "",
+              const RuntimeOption &custom_option = RuntimeOption(),
+              const ModelFormat &model_format = ModelFormat::ONNX);
+  /// Get model's name
+  std::string ModelName() const { return "nanodet"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.35 \param[in] nms_iou_threshold
+   * iou threashold for NMS, default is 0.5 \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.35f,
+                       float nms_iou_threshold = 0.5f);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of input size (width, height),
+  default (320, 320)
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value;
+  // keep aspect ratio or not when perform resize operation.
+  // This option is set as `false` by default in NanoDet-Plus
+  bool keep_ratio;
+  // downsample strides for NanoDet-Plus to generate anchors,
+  // will take (8, 16, 32, 64) as default values
+  std::vector<int> downsample_strides;
+  // for offseting the boxes by classes when using NMS, default 4096
+  float max_wh;
+  /*! @brief
+  Argument for image postprocessing step, reg_max for GFL regression, default 7
+  */
+  int reg_max;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  bool IsDynamicInput() const { return is_dynamic_input_; }
+
+  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
+  // or not.)
+  // RangiLyu/nanodet official 'export_onnx.py' script will export static ONNX
+  // by default.
+  // This value will auto check by ultrainfer after the internal Runtime
+  // initialized.
+  bool is_dynamic_input_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc
new file mode 100755
index 0000000000..70364a7269
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/nanodet_plus_pybind.cc
@@ -0,0 +1,40 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindNanoDetPlus(pybind11::module &m) {
+  pybind11::class_<vision::detection::NanoDetPlus, UltraInferModel>(
+      m, "NanoDetPlus")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::NanoDetPlus &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::detection::NanoDetPlus::size)
+      .def_readwrite("padding_value",
+                     &vision::detection::NanoDetPlus::padding_value)
+      .def_readwrite("keep_ratio", &vision::detection::NanoDetPlus::keep_ratio)
+      .def_readwrite("downsample_strides",
+                     &vision::detection::NanoDetPlus::downsample_strides)
+      .def_readwrite("max_wh", &vision::detection::NanoDetPlus::max_wh)
+      .def_readwrite("reg_max", &vision::detection::NanoDetPlus::reg_max);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h
new file mode 100755
index 0000000000..53e2d6a963
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/model.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h"
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+class ULTRAINFER_DECL RKYOLOV5 : public RKYOLO {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g picodet/model.pdmodel
+   * \param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends` \param[in]
+   * model_format Model format of the loaded model, default is Paddle format
+   */
+  RKYOLOV5(const std::string &model_file,
+           const RuntimeOption &custom_option = RuntimeOption(),
+           const ModelFormat &model_format = ModelFormat::RKNN)
+      : RKYOLO(model_file, custom_option, model_format) {
+    valid_cpu_backends = {};
+    valid_gpu_backends = {};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    std::vector<int> anchors = {10, 13, 16,  30,  33, 23,  30,  61,  62,
+                                45, 59, 119, 116, 90, 156, 198, 373, 326};
+    int anchor_per_branch_ = 3;
+    GetPostprocessor().SetAnchor(anchors);
+    GetPostprocessor().SetAnchorPerBranch(anchor_per_branch_);
+  }
+
+  virtual std::string ModelName() const { return "RKYOLOV5"; }
+};
+
+class ULTRAINFER_DECL RKYOLOV7 : public RKYOLO {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g picodet/model.pdmodel
+   * \param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends` \param[in]
+   * model_format Model format of the loaded model, default is Paddle format
+   */
+  RKYOLOV7(const std::string &model_file,
+           const RuntimeOption &custom_option = RuntimeOption(),
+           const ModelFormat &model_format = ModelFormat::RKNN)
+      : RKYOLO(model_file, custom_option, model_format) {
+    valid_cpu_backends = {};
+    valid_gpu_backends = {};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    std::vector<int> anchors = {12, 16, 19,  36,  40,  28,  36,  75,  76,
+                                55, 72, 146, 142, 110, 192, 243, 459, 401};
+    int anchor_per_branch_ = 3;
+    GetPostprocessor().SetAnchor(anchors);
+    GetPostprocessor().SetAnchorPerBranch(anchor_per_branch_);
+  }
+
+  virtual std::string ModelName() const { return "RKYOLOV7"; }
+};
+
+class ULTRAINFER_DECL RKYOLOX : public RKYOLO {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g picodet/model.pdmodel
+   * \param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends` \param[in]
+   * model_format Model format of the loaded model, default is Paddle format
+   */
+  RKYOLOX(const std::string &model_file,
+          const RuntimeOption &custom_option = RuntimeOption(),
+          const ModelFormat &model_format = ModelFormat::RKNN)
+      : RKYOLO(model_file, custom_option, model_format) {
+    valid_cpu_backends = {};
+    valid_gpu_backends = {};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    std::vector<int> anchors = {1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                1, 1, 1, 1, 1, 1, 1, 1, 1};
+    int anchor_per_branch_ = 1;
+    GetPostprocessor().SetAnchor(anchors);
+    GetPostprocessor().SetAnchorPerBranch(anchor_per_branch_);
+  }
+
+  virtual std::string ModelName() const { return "RKYOLOV7"; }
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc
new file mode 100755
index 0000000000..ae566ca4a9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.cc
@@ -0,0 +1,212 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+RKYOLOPostprocessor::RKYOLOPostprocessor() {}
+
+bool RKYOLOPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                              std::vector<DetectionResult> *results) {
+  results->resize(tensors[0].shape[0]);
+  for (int num = 0; num < tensors[0].shape[0]; ++num) {
+    int validCount = 0;
+    std::vector<float> filterBoxes;
+    std::vector<float> boxesScore;
+    std::vector<int> classId;
+    for (int i = 0; i < tensors.size(); i++) {
+      auto tensor_shape = tensors[i].shape;
+      auto skip_num = std::accumulate(tensor_shape.begin(), tensor_shape.end(),
+                                      1, std::multiplies<int>());
+      int skip_address = num * skip_num;
+      int stride = strides_[i];
+      int grid_h = height_ / stride;
+      int grid_w = width_ / stride;
+      int *anchor = &(anchors_.data()[i * 2 * anchor_per_branch_]);
+      if (tensors[i].dtype == FDDataType::FP32) {
+        validCount = validCount +
+                     ProcessFP16((float *)tensors[i].Data() + skip_address,
+                                 anchor, grid_h, grid_w, stride, filterBoxes,
+                                 boxesScore, classId, conf_threshold_);
+      } else {
+        FDERROR << "RKYOLO Only Support FP32 Model."
+                << "But the result's type is " << Str(tensors[i].dtype)
+                << std::endl;
+      }
+    }
+
+    // no object detect
+    if (validCount <= 0) {
+      FDINFO << "The number of object detect is 0." << std::endl;
+      return true;
+    }
+
+    std::vector<int> indexArray;
+    for (int i = 0; i < validCount; ++i) {
+      indexArray.push_back(i);
+    }
+
+    QuickSortIndiceInverse(boxesScore, 0, validCount - 1, indexArray);
+
+    if (anchor_per_branch_ == 3) {
+      NMS(validCount, filterBoxes, classId, indexArray, nms_threshold_, false);
+    } else if (anchor_per_branch_ == 1) {
+      NMS(validCount, filterBoxes, classId, indexArray, nms_threshold_, true);
+    } else {
+      FDERROR << "anchor_per_branch_ only support 3 or 1." << std::endl;
+      return false;
+    }
+
+    int last_count = 0;
+    (*results)[num].Clear();
+    (*results)[num].Reserve(validCount);
+
+    /* box valid detect target */
+    for (int i = 0; i < validCount; ++i) {
+      if (indexArray[i] == -1 || boxesScore[i] < conf_threshold_ ||
+          last_count >= obj_num_bbox_max_size) {
+        continue;
+      }
+      int n = indexArray[i];
+      float x1 = filterBoxes[n * 4 + 0];
+      float y1 = filterBoxes[n * 4 + 1];
+      float x2 = x1 + filterBoxes[n * 4 + 2];
+      float y2 = y1 + filterBoxes[n * 4 + 3];
+      int id = classId[n];
+      (*results)[num].boxes.emplace_back(std::array<float, 4>{
+          (float)((Clamp(x1, 0, width_) - pad_hw_values_[num][1] / 2) /
+                  scale_[num]),
+          (float)((Clamp(y1, 0, height_) - pad_hw_values_[num][0] / 2) /
+                  scale_[num]),
+          (float)((Clamp(x2, 0, width_) - pad_hw_values_[num][1] / 2) /
+                  scale_[num]),
+          (float)((Clamp(y2, 0, height_) - pad_hw_values_[num][0] / 2) /
+                  scale_[0])});
+      (*results)[num].label_ids.push_back(id);
+      (*results)[num].scores.push_back(boxesScore[i]);
+      last_count++;
+    }
+  }
+  return true;
+}
+
+int RKYOLOPostprocessor::ProcessFP16(float *input, int *anchor, int grid_h,
+                                     int grid_w, int stride,
+                                     std::vector<float> &boxes,
+                                     std::vector<float> &boxScores,
+                                     std::vector<int> &classId,
+                                     float threshold) {
+
+  int validCount = 0;
+  int grid_len = grid_h * grid_w;
+  // float thres_sigmoid = threshold;
+  for (int a = 0; a < anchor_per_branch_; a++) {
+    for (int i = 0; i < grid_h; i++) {
+      for (int j = 0; j < grid_w; j++) {
+        float box_confidence =
+            input[(prob_box_size_ * a + 4) * grid_len + i * grid_w + j];
+        if (box_confidence >= threshold) {
+          int offset = (prob_box_size_ * a) * grid_len + i * grid_w + j;
+          float *in_ptr = input + offset;
+
+          float maxClassProbs = in_ptr[5 * grid_len];
+          int maxClassId = 0;
+          for (int k = 1; k < obj_class_num_; ++k) {
+            float prob = in_ptr[(5 + k) * grid_len];
+            if (prob > maxClassProbs) {
+              maxClassId = k;
+              maxClassProbs = prob;
+            }
+          }
+          float box_conf_f32 = (box_confidence);
+          float class_prob_f32 = (maxClassProbs);
+          float limit_score = 0;
+          if (anchor_per_branch_ == 1) {
+            limit_score = class_prob_f32;
+          } else {
+            limit_score = box_conf_f32 * class_prob_f32;
+          }
+          if (limit_score > conf_threshold_) {
+            float box_x, box_y, box_w, box_h;
+            if (anchor_per_branch_ == 1) {
+              box_x = *in_ptr;
+              box_y = (in_ptr[grid_len]);
+              box_w = exp(in_ptr[2 * grid_len]) * stride;
+              box_h = exp(in_ptr[3 * grid_len]) * stride;
+            } else {
+              box_x = *in_ptr * 2.0 - 0.5;
+              box_y = (in_ptr[grid_len]) * 2.0 - 0.5;
+              box_w = (in_ptr[2 * grid_len]) * 2.0;
+              box_h = (in_ptr[3 * grid_len]) * 2.0;
+              box_w *= box_w;
+              box_h *= box_h;
+            }
+            box_x = (box_x + j) * (float)stride;
+            box_y = (box_y + i) * (float)stride;
+            box_w *= (float)anchor[a * 2];
+            box_h *= (float)anchor[a * 2 + 1];
+            box_x -= (box_w / 2.0);
+            box_y -= (box_h / 2.0);
+
+            boxes.push_back(box_x);
+            boxes.push_back(box_y);
+            boxes.push_back(box_w);
+            boxes.push_back(box_h);
+            boxScores.push_back(box_conf_f32 * class_prob_f32);
+            classId.push_back(maxClassId);
+            validCount++;
+          }
+        }
+      }
+    }
+  }
+  return validCount;
+}
+
+int RKYOLOPostprocessor::QuickSortIndiceInverse(std::vector<float> &input,
+                                                int left, int right,
+                                                std::vector<int> &indices) {
+  float key;
+  int key_index;
+  int low = left;
+  int high = right;
+  if (left < right) {
+    key_index = indices[left];
+    key = input[left];
+    while (low < high) {
+      while (low < high && input[high] <= key) {
+        high--;
+      }
+      input[low] = input[high];
+      indices[low] = indices[high];
+      while (low < high && input[low] >= key) {
+        low++;
+      }
+      input[high] = input[low];
+      indices[high] = indices[low];
+    }
+    input[low] = key;
+    indices[low] = key_index;
+    QuickSortIndiceInverse(input, left, low - 1, indices);
+    QuickSortIndiceInverse(input, low + 1, right, indices);
+  }
+  return low;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h
new file mode 100755
index 0000000000..9329bf4155
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h
@@ -0,0 +1,115 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/detection/contrib/rknpu2/utils.h"
+#include <array>
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief Postprocessor object for YOLOv5 serials model.
+ */
+class ULTRAINFER_DECL RKYOLOPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for YOLOv5 serials model
+   */
+  RKYOLOPostprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<DetectionResult> *results);
+
+  /// Set nms_threshold, default 0.45
+  void SetNMSThreshold(float nms_threshold) { nms_threshold_ = nms_threshold; }
+
+  /// Set conf_threshold, default 0.25
+  void SetConfThreshold(float conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.25
+  const float GetConfThreshold() { return conf_threshold_; }
+
+  /// Get nms_threshold, default 0.45
+  const float GetNMSThreshold() { return nms_threshold_; }
+
+  /// Set height and weight
+  void SetHeightAndWeight(int height, int width) {
+    height_ = height;
+    width_ = width;
+  }
+
+  /// Set pad_hw_values
+  void SetPadHWValues(const std::vector<std::vector<int>> &pad_hw_values) {
+    pad_hw_values_ = pad_hw_values;
+  }
+
+  /// Set scale
+  void SetScale(const std::vector<float> &scale) { scale_ = scale; }
+
+  /// Get Anchor
+  const std::vector<int> &GetAnchor() { return anchors_; }
+
+  /// Set Anchor
+  void SetAnchor(const std::vector<int> &anchors) { anchors_ = anchors; }
+
+  void SetAnchorPerBranch(int anchor_per_branch) {
+    anchor_per_branch_ = anchor_per_branch;
+  }
+
+  /// Set the number of class
+  void SetClassNum(int num) {
+    obj_class_num_ = num;
+    prob_box_size_ = obj_class_num_ + 5;
+  }
+  /// Get the number of class
+  int GetClassNum() { return obj_class_num_; }
+
+private:
+  std::vector<int> anchors_ = {10, 13, 16,  30,  33, 23,  30,  61,  62,
+                               45, 59, 119, 116, 90, 156, 198, 373, 326};
+  int strides_[3] = {8, 16, 32};
+  int height_ = 0;
+  int width_ = 0;
+  int anchor_per_branch_ = 0;
+
+  int ProcessFP16(float *input, int *anchor, int grid_h, int grid_w, int stride,
+                  std::vector<float> &boxes, std::vector<float> &boxScores,
+                  std::vector<int> &classId, float threshold);
+  // Model
+  int QuickSortIndiceInverse(std::vector<float> &input, int left, int right,
+                             std::vector<int> &indices);
+
+  // post_process values
+  std::vector<std::vector<int>> pad_hw_values_;
+  std::vector<float> scale_;
+  float nms_threshold_ = 0.45;
+  float conf_threshold_ = 0.25;
+  int prob_box_size_ = 85;
+  int obj_class_num_ = 80;
+  int obj_num_bbox_max_size = 200;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc
new file mode 100755
index 0000000000..3aeb302767
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.cc
@@ -0,0 +1,109 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+RKYOLOPreprocessor::RKYOLOPreprocessor() {
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = true;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+}
+
+void RKYOLOPreprocessor::LetterBox(FDMat *mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+  scale_.push_back(scale);
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+
+  pad_hw_values_.push_back({pad_h, pad_w});
+
+  if (std::fabs(scale - 1.0f) > 1e-06) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_value_);
+  }
+}
+
+bool RKYOLOPreprocessor::Preprocess(FDMat *mat, FDTensor *output) {
+  // RKYOLO's preprocess steps
+  // 1. letterbox
+  // 2. convert_and_permute(swap_rb=true)
+  LetterBox(mat);
+  BGR2RGB::Run(mat);
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, h, w, c
+  return true;
+}
+
+bool RKYOLOPreprocessor::Run(std::vector<FDMat> *images,
+                             std::vector<FDTensor> *outputs) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h
new file mode 100755
index 0000000000..2da7d78b0a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h
@@ -0,0 +1,99 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief Preprocessor object for YOLOv5 serials model.
+ */
+class ULTRAINFER_DECL RKYOLOPreprocessor {
+public:
+  /** \brief Create a preprocessor instance for YOLOv5 serials model
+   */
+  RKYOLOPreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingValue(const std::vector<float> &padding_value) {
+    padding_value_ = padding_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  std::vector<float> GetPaddingValue() const { return padding_value_; }
+
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; }
+
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
+
+  std::vector<std::vector<int>> GetPadHWValues() const {
+    return pad_hw_values_;
+  }
+  std::vector<float> GetScale() const { return scale_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output);
+
+  void LetterBox(FDMat *mat);
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+
+  std::vector<std::vector<int>> pad_hw_values_;
+  std::vector<float> scale_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc
new file mode 100755
index 0000000000..fec34e74e4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.cc
@@ -0,0 +1,83 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+RKYOLO::RKYOLO(const std::string &model_file,
+               const ultrainfer::RuntimeOption &custom_option,
+               const ultrainfer::ModelFormat &model_format) {
+  if (model_format == ModelFormat::RKNN) {
+    valid_cpu_backends = {};
+    valid_gpu_backends = {};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  } else {
+    FDERROR << "RKYOLO Only Support run in RKNPU2" << std::endl;
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  initialized = Initialize();
+}
+
+bool RKYOLO::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  auto size = GetPreprocessor().GetSize();
+  GetPostprocessor().SetHeightAndWeight(size[0], size[1]);
+  return true;
+}
+
+bool RKYOLO::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool RKYOLO::BatchPredict(const std::vector<cv::Mat> &images,
+                          std::vector<DetectionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  auto pad_hw_values_ = preprocessor_.GetPadHWValues();
+  postprocessor_.SetPadHWValues(preprocessor_.GetPadHWValues());
+  postprocessor_.SetScale(preprocessor_.GetScale());
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h
new file mode 100755
index 0000000000..9eafe1a6de
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo.h
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/detection/contrib/rknpu2/postprocessor.h"
+#include "ultrainfer/vision/detection/contrib/rknpu2/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+class ULTRAINFER_DECL RKYOLO : public UltraInferModel {
+public:
+  RKYOLO(const std::string &model_file,
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::RKNN);
+
+  std::string ModelName() const { return "RKYOLO"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, DetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  /// Get preprocessor reference of YOLOv5
+  RKYOLOPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of YOLOv5
+  RKYOLOPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  RKYOLOPreprocessor preprocessor_;
+  RKYOLOPostprocessor postprocessor_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc
new file mode 100755
index 0000000000..bd7bd50c16
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/rkyolo_pybind.cc
@@ -0,0 +1,163 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindRKYOLO(pybind11::module &m) {
+  pybind11::class_<vision::detection::RKYOLOPreprocessor>(m,
+                                                          "RKYOLOPreprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::RKYOLOPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "PaddleClasPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def_property("size", &vision::detection::RKYOLOPreprocessor::GetSize,
+                    &vision::detection::RKYOLOPreprocessor::SetSize)
+      .def_property("padding_value",
+                    &vision::detection::RKYOLOPreprocessor::GetPaddingValue,
+                    &vision::detection::RKYOLOPreprocessor::SetPaddingValue)
+      .def_property("is_scale_up",
+                    &vision::detection::RKYOLOPreprocessor::GetScaleUp,
+                    &vision::detection::RKYOLOPreprocessor::SetScaleUp);
+
+  pybind11::class_<vision::detection::RKYOLOPostprocessor>(
+      m, "RKYOLOPostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::RKYOLOPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::DetectionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "RKYOLOV5Postprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::detection::RKYOLOPostprocessor &self,
+              std::vector<pybind11::array> &input_array) {
+             std::vector<vision::DetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "RKYOLOV5Postprocessor.");
+             }
+             return results;
+           })
+      .def("set_anchor", [](vision::detection::RKYOLOPostprocessor &self,
+                            std::vector<int> &data) { self.SetAnchor(data); })
+      .def_property("conf_threshold",
+                    &vision::detection::RKYOLOPostprocessor::GetConfThreshold,
+                    &vision::detection::RKYOLOPostprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::detection::RKYOLOPostprocessor::GetNMSThreshold,
+                    &vision::detection::RKYOLOPostprocessor::SetNMSThreshold)
+      .def_property("class_num",
+                    &vision::detection::RKYOLOPostprocessor::GetClassNum,
+                    &vision::detection::RKYOLOPostprocessor::SetClassNum);
+
+  pybind11::class_<vision::detection::RKYOLOV5, UltraInferModel>(m, "RKYOLOV5")
+      .def(pybind11::init<std::string, RuntimeOption, ModelFormat>())
+      .def("predict",
+           [](vision::detection::RKYOLOV5 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::RKYOLOV5 &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::RKYOLOV5::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::RKYOLOV5::GetPostprocessor);
+
+  pybind11::class_<vision::detection::RKYOLOX, UltraInferModel>(m, "RKYOLOX")
+      .def(pybind11::init<std::string, RuntimeOption, ModelFormat>())
+      .def("predict",
+           [](vision::detection::RKYOLOX &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::RKYOLOX &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::RKYOLOX::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::RKYOLOX::GetPostprocessor);
+
+  pybind11::class_<vision::detection::RKYOLOV7, UltraInferModel>(m, "RKYOLOV7")
+      .def(pybind11::init<std::string, RuntimeOption, ModelFormat>())
+      .def("predict",
+           [](vision::detection::RKYOLOV7 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::RKYOLOV7 &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::RKYOLOV7::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::RKYOLOV7::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc
new file mode 100755
index 0000000000..7e534ac14c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/detection/contrib/rknpu2/utils.h"
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+float Clamp(float val, int min, int max) {
+  return val > min ? (val < max ? val : max) : min;
+}
+static float CalculateOverlap(float xmin0, float ymin0, float xmax0,
+                              float ymax0, float xmin1, float ymin1,
+                              float xmax1, float ymax1) {
+  float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
+  float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
+  float i = w * h;
+  float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) +
+            (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
+  return u <= 0.f ? 0.f : (i / u);
+}
+
+int NMS(int valid_count, std::vector<float> &output_locations,
+        std::vector<int> &class_id, std::vector<int> &order, float threshold,
+        bool class_agnostic) {
+  for (int i = 0; i < valid_count; ++i) {
+    if (order[i] == -1) {
+      continue;
+    }
+    int n = order[i];
+    for (int j = i + 1; j < valid_count; ++j) {
+      int m = order[j];
+      if (m == -1) {
+        continue;
+      }
+
+      if (!class_agnostic && class_id[n] != class_id[m]) {
+        continue;
+      }
+
+      float xmin0 = output_locations[n * 4 + 0];
+      float ymin0 = output_locations[n * 4 + 1];
+      float xmax0 = output_locations[n * 4 + 0] + output_locations[n * 4 + 2];
+      float ymax0 = output_locations[n * 4 + 1] + output_locations[n * 4 + 3];
+
+      float xmin1 = output_locations[m * 4 + 0];
+      float ymin1 = output_locations[m * 4 + 1];
+      float xmax1 = output_locations[m * 4 + 0] + output_locations[m * 4 + 2];
+      float ymax1 = output_locations[m * 4 + 1] + output_locations[m * 4 + 3];
+
+      float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1,
+                                   xmax1, ymax1);
+
+      if (iou > threshold) {
+        order[j] = -1;
+      }
+    }
+  }
+  return 0;
+}
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h
new file mode 100755
index 0000000000..c357212770
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/rknpu2/utils.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <cmath>
+#include <cstdint>
+#include <vector>
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+float Clamp(float val, int min, int max);
+int NMS(int valid_count, std::vector<float> &output_locations,
+        std::vector<int> &class_id, std::vector<int> &order, float threshold,
+        bool class_agnostic);
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc
new file mode 100755
index 0000000000..a91ce09886
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.cc
@@ -0,0 +1,254 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/scaledyolov4.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+void ScaledYOLOv4::LetterBox(Mat *mat, const std::vector<int> &size,
+                             const std::vector<float> &color, bool _auto,
+                             bool scale_fill, bool scale_up, int stride) {
+  float scale =
+      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+ScaledYOLOv4::ScaledYOLOv4(const std::string &model_file,
+                           const std::string &params_file,
+                           const RuntimeOption &custom_option,
+                           const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool ScaledYOLOv4::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  max_wh = 7680.0;
+  reused_input_tensors_.resize(1);
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+  return true;
+}
+
+bool ScaledYOLOv4::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // process after image load
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_AREA;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(mat->Height() * ratio);
+    int resize_w = int(mat->Width() * ratio);
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  // ScaledYOLOv4's preprocess steps
+  // 1. letterbox
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  ScaledYOLOv4::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
+                          is_scale_up, stride);
+  BGR2RGB::Run(mat);
+  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+  //                std::vector<float>(mat->Channels(), 1.0));
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool ScaledYOLOv4::Postprocess(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    int s = i * infer_result.shape[2];
+    float confidence = data[s + 4];
+    float *max_class_score =
+        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+    confidence *= (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(data + s + 5, max_class_score);
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
+        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  float pad_h = (out_h - ipt_h * scale) / 2.0f;
+  float pad_w = (out_w - ipt_w * scale) / 2.0f;
+  if (is_mini_pad) {
+    // 和 LetterBox中_auto=true的处理逻辑对应
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool ScaledYOLOv4::Predict(cv::Mat *im, DetectionResult *result,
+                           float conf_threshold, float nms_iou_threshold) {
+  Mat mat(*im);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer()) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h
new file mode 100755
index 0000000000..ba160ca702
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4.h
@@ -0,0 +1,101 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief ScaledYOLOv4 model object used when to load a ScaledYOLOv4 model
+ * exported by ScaledYOLOv4.
+ */
+class ULTRAINFER_DECL ScaledYOLOv4 : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./scaled_yolov4.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+
+  ScaledYOLOv4(const std::string &model_file,
+               const std::string &params_file = "",
+               const RuntimeOption &custom_option = RuntimeOption(),
+               const ModelFormat &model_format = ModelFormat::ONNX);
+
+  virtual std::string ModelName() const { return "ScaledYOLOv4"; }
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.25 \param[in] nms_iou_threshold
+   * iou threashold for NMS, default is 0.5 \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25,
+                       float nms_iou_threshold = 0.5);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {640, 640}
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+  // for offseting the boxes by classes when using NMS
+  float max_wh;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  void LetterBox(Mat *mat, const std::vector<int> &size,
+                 const std::vector<float> &color, bool _auto,
+                 bool scale_fill = false, bool scale_up = true,
+                 int stride = 32);
+
+  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
+  // or not.)
+  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
+  // value will
+  // auto check by ultrainfer after the internal Runtime already initialized
+  bool is_dynamic_input_;
+};
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc
new file mode 100755
index 0000000000..f3961bbb11
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/scaledyolov4_pybind.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindScaledYOLOv4(pybind11::module &m) {
+  pybind11::class_<vision::detection::ScaledYOLOv4, UltraInferModel>(
+      m, "ScaledYOLOv4")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::ScaledYOLOv4 &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::detection::ScaledYOLOv4::size)
+      .def_readwrite("padding_value",
+                     &vision::detection::ScaledYOLOv4::padding_value)
+      .def_readwrite("is_mini_pad",
+                     &vision::detection::ScaledYOLOv4::is_mini_pad)
+      .def_readwrite("is_no_pad", &vision::detection::ScaledYOLOv4::is_no_pad)
+      .def_readwrite("is_scale_up",
+                     &vision::detection::ScaledYOLOv4::is_scale_up)
+      .def_readwrite("stride", &vision::detection::ScaledYOLOv4::stride)
+      .def_readwrite("max_wh", &vision::detection::ScaledYOLOv4::max_wh);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc
new file mode 100755
index 0000000000..11e945e0d2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.cc
@@ -0,0 +1,252 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolor.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+void YOLOR::LetterBox(Mat *mat, const std::vector<int> &size,
+                      const std::vector<float> &color, bool _auto,
+                      bool scale_fill, bool scale_up, int stride) {
+  float scale =
+      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+YOLOR::YOLOR(const std::string &model_file, const std::string &params_file,
+             const RuntimeOption &custom_option,
+             const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOR::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  max_wh = 7680.0;
+  reused_input_tensors_.resize(1);
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+  return true;
+}
+
+bool YOLOR::Preprocess(Mat *mat, FDTensor *output,
+                       std::map<std::string, std::array<float, 2>> *im_info) {
+  // process after image load
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_AREA;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(mat->Height() * ratio);
+    int resize_w = int(mat->Width() * ratio);
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  // yolor's preprocess steps
+  // 1. letterbox
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  YOLOR::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
+                   is_scale_up, stride);
+  BGR2RGB::Run(mat);
+  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+  //                std::vector<float>(mat->Channels(), 1.0));
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOR::Postprocess(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    int s = i * infer_result.shape[2];
+    float confidence = data[s + 4];
+    float *max_class_score =
+        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+    confidence *= (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(data + s + 5, max_class_score);
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
+        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  float pad_h = (out_h - ipt_h * scale) / 2.0f;
+  float pad_w = (out_w - ipt_w * scale) / 2.0f;
+  if (is_mini_pad) {
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOR::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold,
+                    float nms_iou_threshold) {
+  Mat mat(*im);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer()) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h
new file mode 100755
index 0000000000..335c80391a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor.h
@@ -0,0 +1,101 @@
+﻿
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOR model object used when to load a YOLOR model exported by YOLOR.
+ */
+class ULTRAINFER_DECL YOLOR : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolor.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOR(const std::string &model_file, const std::string &params_file = "",
+        const RuntimeOption &custom_option = RuntimeOption(),
+        const ModelFormat &model_format = ModelFormat::ONNX);
+
+  virtual std::string ModelName() const { return "YOLOR"; }
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread()
+   * \param[in] result The output detection result will be writen to this
+   * structure \param[in] conf_threshold confidence threashold for
+   * postprocessing, default is 0.25 \param[in] nms_iou_threshold iou threashold
+   * for NMS, default is 0.5 \return true if the prediction successed, otherwise
+   * false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25,
+                       float nms_iou_threshold = 0.5);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {640, 640}
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+  // for offseting the boxes by classes when using NMS
+  float max_wh;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  void LetterBox(Mat *mat, const std::vector<int> &size,
+                 const std::vector<float> &color, bool _auto,
+                 bool scale_fill = false, bool scale_up = true,
+                 int stride = 32);
+
+  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
+  // or not.)
+  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
+  // value will
+  // auto check by ultrainfer after the internal Runtime already initialized.
+  bool is_dynamic_input_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc
new file mode 100755
index 0000000000..2e226f65a9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolor_pybind.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOR(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOR, UltraInferModel>(m, "YOLOR")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOR &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::detection::YOLOR::size)
+      .def_readwrite("padding_value", &vision::detection::YOLOR::padding_value)
+      .def_readwrite("is_mini_pad", &vision::detection::YOLOR::is_mini_pad)
+      .def_readwrite("is_no_pad", &vision::detection::YOLOR::is_no_pad)
+      .def_readwrite("is_scale_up", &vision::detection::YOLOR::is_scale_up)
+      .def_readwrite("stride", &vision::detection::YOLOR::stride)
+      .def_readwrite("max_wh", &vision::detection::YOLOR::max_wh);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc
new file mode 100755
index 0000000000..1f19d0dba2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.cc
@@ -0,0 +1,140 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov5/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv5Postprocessor::YOLOv5Postprocessor() {
+  conf_threshold_ = 0.25;
+  nms_threshold_ = 0.5;
+  multi_label_ = true;
+  max_wh_ = 7680.0;
+}
+
+bool YOLOv5Postprocessor::Run(
+    const std::vector<FDTensor> &tensors, std::vector<DetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = tensors[0].shape[0];
+
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+    (*results)[bs].Clear();
+    if (multi_label_) {
+      (*results)[bs].Reserve(tensors[0].shape[1] * (tensors[0].shape[2] - 5));
+    } else {
+      (*results)[bs].Reserve(tensors[0].shape[1]);
+    }
+    if (tensors[0].dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    const float *data = reinterpret_cast<const float *>(tensors[0].Data()) +
+                        bs * tensors[0].shape[1] * tensors[0].shape[2];
+    for (size_t i = 0; i < tensors[0].shape[1]; ++i) {
+      int s = i * tensors[0].shape[2];
+      float confidence = data[s + 4];
+      if (multi_label_) {
+        for (size_t j = 5; j < tensors[0].shape[2]; ++j) {
+          confidence = data[s + 4];
+          const float *class_score = data + s + j;
+          confidence *= (*class_score);
+          // filter boxes by conf_threshold
+          if (confidence <= conf_threshold_) {
+            continue;
+          }
+          int32_t label_id = std::distance(data + s + 5, class_score);
+
+          // convert from [x, y, w, h] to [x1, y1, x2, y2]
+          (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+              data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+              data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+          (*results)[bs].label_ids.push_back(label_id);
+          (*results)[bs].scores.push_back(confidence);
+        }
+      } else {
+        const float *max_class_score =
+            std::max_element(data + s + 5, data + s + tensors[0].shape[2]);
+        confidence *= (*max_class_score);
+        // filter boxes by conf_threshold
+        if (confidence <= conf_threshold_) {
+          continue;
+        }
+        int32_t label_id = std::distance(data + s + 5, max_class_score);
+        // convert from [x, y, w, h] to [x1, y1, x2, y2]
+        (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+            data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+            data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+        (*results)[bs].label_ids.push_back(label_id);
+        (*results)[bs].scores.push_back(confidence);
+      }
+    }
+
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    utils::NMS(&((*results)[bs]), nms_threshold_);
+
+    // scale the boxes to the origin image shape
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+             "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      int32_t label_id = ((*results)[bs].label_ids)[i];
+      // clip box
+      (*results)[bs].boxes[i][0] =
+          (*results)[bs].boxes[i][0] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][1] =
+          (*results)[bs].boxes[i][1] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][2] =
+          (*results)[bs].boxes[i][2] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][3] =
+          (*results)[bs].boxes[i][3] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][0] =
+          std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][1] =
+          std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][2] =
+          std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][3] =
+          std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w);
+      (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h);
+      (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w);
+      (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h);
+    }
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h
new file mode 100755
index 0000000000..ac437c6ffa
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/postprocessor.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Postprocessor object for YOLOv5 serials model.
+ */
+class ULTRAINFER_DECL YOLOv5Postprocessor {
+public:
+  /** \brief Create a postprocessor instance for YOLOv5 serials model
+   */
+  YOLOv5Postprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &tensors,
+      std::vector<DetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.25
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.25
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.5
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.5
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+  /// Set multi_label, set true for eval, default true
+  void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; }
+
+  /// Get multi_label, default true
+  bool GetMultiLabel() const { return multi_label_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  bool multi_label_;
+  float max_wh_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc
new file mode 100755
index 0000000000..7f012f09c9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.cc
@@ -0,0 +1,119 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov5/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv5Preprocessor::YOLOv5Preprocessor() {
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = true;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+}
+
+void YOLOv5Preprocessor::LetterBox(FDMat *mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+  if (std::fabs(scale - 1.0f) > 1e-06) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_value_);
+  }
+}
+
+bool YOLOv5Preprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  // yolov5's preprocess steps
+  // 1. letterbox
+  // 2. convert_and_permute(swap_rb=true)
+  LetterBox(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOv5Preprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h
new file mode 100755
index 0000000000..47331719be
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/preprocessor.h
@@ -0,0 +1,107 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for YOLOv5 serials model.
+ */
+class ULTRAINFER_DECL YOLOv5Preprocessor {
+public:
+  /** \brief Create a preprocessor instance for YOLOv5 serials model
+   */
+  YOLOv5Preprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingValue(const std::vector<float> &padding_value) {
+    padding_value_ = padding_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  std::vector<float> GetPaddingValue() const { return padding_value_; }
+
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; }
+
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
+
+  /// Set is_mini_pad, pad to the minimum rectange
+  /// which height and width is times of stride
+  void SetMiniPad(bool is_mini_pad) { is_mini_pad_ = is_mini_pad; }
+
+  /// Get is_mini_pad, default false
+  bool GetMiniPad() const { return is_mini_pad_; }
+
+  /// Set padding stride, only for mini_pad mode
+  void SetStride(int stride) { stride_ = stride; }
+
+  /// Get padding stride, default 32
+  bool GetStride() const { return stride_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  void LetterBox(FDMat *mat);
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc
new file mode 100755
index 0000000000..61e7998d64
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.cc
@@ -0,0 +1,97 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov5/yolov5.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv5::YOLOv5(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else if (model_format == ModelFormat::SOPHGO) {
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv5::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv5::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold,
+                     float nms_threshold) {
+  postprocessor_.SetConfThreshold(conf_threshold);
+  postprocessor_.SetNMSThreshold(nms_threshold);
+  if (!Predict(*im, result)) {
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv5::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool YOLOv5::BatchPredict(const std::vector<cv::Mat> &images,
+                          std::vector<DetectionResult> *results) {
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h
new file mode 100755
index 0000000000..f968c5bd78
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5.h
@@ -0,0 +1,89 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/detection/contrib/yolov5/postprocessor.h"
+#include "ultrainfer/vision/detection/contrib/yolov5/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv5 model object used when to load a YOLOv5 model exported by
+ * YOLOv5.
+ */
+class ULTRAINFER_DECL YOLOv5 : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov5.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv5(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "yolov5"; }
+
+  /** \brief DEPRECATED Predict the detection result for an input image, remove
+   * at 1.0 version
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.25 \param[in] nms_threshold iou
+   * threashold for NMS, default is 0.5 \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25, float nms_threshold = 0.5);
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, DetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  /// Get preprocessor reference of YOLOv5
+  virtual YOLOv5Preprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of YOLOv5
+  virtual YOLOv5Postprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  YOLOv5Preprocessor preprocessor_;
+  YOLOv5Postprocessor postprocessor_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc
new file mode 100755
index 0000000000..d81c13ef8a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -0,0 +1,122 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv5(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv5Preprocessor>(m,
+                                                          "YOLOv5Preprocessor")
+      .def(pybind11::init<>())
+      .def(
+          "run",
+          [](vision::detection::YOLOv5Preprocessor &self,
+             std::vector<pybind11::array> &im_list) {
+            std::vector<vision::FDMat> images;
+            for (size_t i = 0; i < im_list.size(); ++i) {
+              images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+            }
+            std::vector<FDTensor> outputs;
+            std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+            if (!self.Run(&images, &outputs, &ims_info)) {
+              throw std::runtime_error(
+                  "Failed to preprocess the input data in YOLOv5Preprocessor.");
+            }
+            for (size_t i = 0; i < outputs.size(); ++i) {
+              outputs[i].StopSharing();
+            }
+            return make_pair(outputs, ims_info);
+          })
+      .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize,
+                    &vision::detection::YOLOv5Preprocessor::SetSize)
+      .def_property("padding_value",
+                    &vision::detection::YOLOv5Preprocessor::GetPaddingValue,
+                    &vision::detection::YOLOv5Preprocessor::SetPaddingValue)
+      .def_property("is_scale_up",
+                    &vision::detection::YOLOv5Preprocessor::GetScaleUp,
+                    &vision::detection::YOLOv5Preprocessor::SetScaleUp)
+      .def_property("is_mini_pad",
+                    &vision::detection::YOLOv5Preprocessor::GetMiniPad,
+                    &vision::detection::YOLOv5Preprocessor::SetMiniPad)
+      .def_property("stride", &vision::detection::YOLOv5Preprocessor::GetStride,
+                    &vision::detection::YOLOv5Preprocessor::SetStride);
+
+  pybind11::class_<vision::detection::YOLOv5Postprocessor>(
+      m, "YOLOv5Postprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::YOLOv5Postprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "YOLOv5Postprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::detection::YOLOv5Postprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "YOLOv5Postprocessor.");
+             }
+             return results;
+           })
+      .def_property("conf_threshold",
+                    &vision::detection::YOLOv5Postprocessor::GetConfThreshold,
+                    &vision::detection::YOLOv5Postprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::detection::YOLOv5Postprocessor::GetNMSThreshold,
+                    &vision::detection::YOLOv5Postprocessor::SetNMSThreshold)
+      .def_property("multi_label",
+                    &vision::detection::YOLOv5Postprocessor::GetMultiLabel,
+                    &vision::detection::YOLOv5Postprocessor::SetMultiLabel);
+
+  pybind11::class_<vision::detection::YOLOv5, UltraInferModel>(m, "YOLOv5")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv5 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::YOLOv5 &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::YOLOv5::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::YOLOv5::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc
new file mode 100755
index 0000000000..f9574ddf4c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.cc
@@ -0,0 +1,471 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov5lite.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+#ifdef WITH_GPU
+#include "ultrainfer/vision/utils/cuda_utils.h"
+#endif // WITH_GPU
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+void YOLOv5Lite::LetterBox(Mat *mat, const std::vector<int> &size,
+                           const std::vector<float> &color, bool _auto,
+                           bool scale_fill, bool scale_up, int stride) {
+  float scale =
+      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+void YOLOv5Lite::GenerateAnchors(const std::vector<int> &size,
+                                 const std::vector<int> &downsample_strides,
+                                 std::vector<Anchor> *anchors,
+                                 int num_anchors) {
+  // size: tuple of input (width, height)
+  // downsample_strides: downsample strides in YOLOv5Lite, e.g (8,16,32)
+  const int width = size[0];
+  const int height = size[1];
+  for (int i = 0; i < downsample_strides.size(); ++i) {
+    const int ds = downsample_strides[i];
+    int num_grid_w = width / ds;
+    int num_grid_h = height / ds;
+    for (int an = 0; an < num_anchors; ++an) {
+      float anchor_w = anchor_config[i][an * 2];
+      float anchor_h = anchor_config[i][an * 2 + 1];
+      for (int g1 = 0; g1 < num_grid_h; ++g1) {
+        for (int g0 = 0; g0 < num_grid_w; ++g0) {
+          (*anchors).emplace_back(Anchor{g0, g1, ds, anchor_w, anchor_h});
+        }
+      }
+    }
+  }
+}
+
+YOLOv5Lite::YOLOv5Lite(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+#ifdef WITH_GPU
+  cudaSetDevice(runtime_option.device_id);
+  cudaStream_t stream;
+  CUDA_CHECK(cudaStreamCreate(&stream));
+  cuda_stream_ = reinterpret_cast<void *>(stream);
+  runtime_option.SetExternalStream(cuda_stream_);
+#endif // WITH_GPU
+  initialized = Initialize();
+}
+
+bool YOLOv5Lite::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  downsample_strides = {8, 16, 32};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  max_wh = 7680.0;
+  is_decode_exported = false;
+  anchor_config = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0},
+                   {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},
+                   {116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
+  reused_input_tensors_.resize(1);
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+  return true;
+}
+
+YOLOv5Lite::~YOLOv5Lite() {
+#ifdef WITH_GPU
+  if (use_cuda_preprocessing_) {
+    CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_));
+    CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_));
+    CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_));
+    CUDA_CHECK(cudaStreamDestroy(reinterpret_cast<cudaStream_t>(cuda_stream_)));
+  }
+#endif // WITH_GPU
+}
+
+bool YOLOv5Lite::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // process after image load
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_AREA;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(mat->Height() * ratio);
+    int resize_w = int(mat->Width() * ratio);
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  // yolov5lite's preprocess steps
+  // 1. letterbox
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  YOLOv5Lite::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
+                        is_scale_up, stride);
+  BGR2RGB::Run(mat);
+  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+  //                std::vector<float>(mat->Channels(), 1.0));
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+void YOLOv5Lite::UseCudaPreprocessing(int max_image_size) {
+#ifdef WITH_GPU
+  use_cuda_preprocessing_ = true;
+  is_scale_up = true;
+  if (input_img_cuda_buffer_host_ == nullptr) {
+    // prepare input data cache in GPU pinned memory
+    CUDA_CHECK(cudaMallocHost((void **)&input_img_cuda_buffer_host_,
+                              max_image_size * 3));
+    // prepare input data cache in GPU device memory
+    CUDA_CHECK(cudaMalloc((void **)&input_img_cuda_buffer_device_,
+                          max_image_size * 3));
+    CUDA_CHECK(cudaMalloc((void **)&input_tensor_cuda_buffer_device_,
+                          3 * size[0] * size[1] * sizeof(float)));
+  }
+#else
+  FDWARNING << "The UltraInfer didn't compile with WITH_GPU=ON." << std::endl;
+  use_cuda_preprocessing_ = false;
+#endif
+}
+
+bool YOLOv5Lite::CudaPreprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+#ifdef WITH_GPU
+  if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) {
+    FDERROR << "Preprocessing with CUDA is only available when the arguments "
+               "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)."
+            << std::endl;
+    return false;
+  }
+
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream_);
+  int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels();
+  memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size);
+  CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_,
+                             input_img_cuda_buffer_host_, src_img_buf_size,
+                             cudaMemcpyHostToDevice, stream));
+  utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(),
+                            mat->Height(), input_tensor_cuda_buffer_device_,
+                            size[0], size[1], padding_value, stream);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(size[0]),
+                                static_cast<float>(size[1])};
+
+  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
+                          input_tensor_cuda_buffer_device_);
+  output->device = Device::GPU;
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+#else
+  FDERROR << "CUDA src code was not enabled." << std::endl;
+  return false;
+#endif // WITH_GPU
+}
+
+bool YOLOv5Lite::PostprocessWithDecode(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  // generate anchors with dowmsample strides
+  std::vector<YOLOv5Lite::Anchor> anchors;
+  int num_anchors = anchor_config[0].size() / 2;
+  GenerateAnchors(size, downsample_strides, &anchors, num_anchors);
+  // infer_result shape might look like (1,n,85=5+80)
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    int s = i * infer_result.shape[2];
+    float confidence = data[s + 4];
+    float *max_class_score =
+        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+    confidence *= (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(data + s + 5, max_class_score);
+    // fetch i-th anchor
+    float grid0 = static_cast<float>(anchors.at(i).grid0);
+    float grid1 = static_cast<float>(anchors.at(i).grid1);
+    float downsample_stride = static_cast<float>(anchors.at(i).stride);
+    float anchor_w = static_cast<float>(anchors.at(i).anchor_w);
+    float anchor_h = static_cast<float>(anchors.at(i).anchor_h);
+    // convert from offsets to [x, y, w, h]
+    float dx = data[s];
+    float dy = data[s + 1];
+    float dw = data[s + 2];
+    float dh = data[s + 3];
+
+    float x = (dx * 2.0f - 0.5f + grid0) * downsample_stride;
+    float y = (dy * 2.0f - 0.5f + grid1) * downsample_stride;
+    float w = std::pow(dw * 2.0f, 2.0f) * anchor_w;
+    float h = std::pow(dh * 2.0f, 2.0f) * anchor_h;
+
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh,
+        x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh});
+    // label_id * max_wh for multi classes NMS
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  float pad_h = (out_h - ipt_h * scale) / 2.0f;
+  float pad_w = (out_w - ipt_w * scale) / 2.0f;
+  if (is_mini_pad) {
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOv5Lite::Postprocess(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    int s = i * infer_result.shape[2];
+    float confidence = data[s + 4];
+    float *max_class_score =
+        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+    confidence *= (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(data + s + 5, max_class_score);
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
+        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  float pad_h = (out_h - ipt_h * scale) / 2.0f;
+  float pad_w = (out_w - ipt_w * scale) / 2.0f;
+  if (is_mini_pad) {
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOv5Lite::Predict(cv::Mat *im, DetectionResult *result,
+                         float conf_threshold, float nms_iou_threshold) {
+  Mat mat(*im);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (use_cuda_preprocessing_) {
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  } else {
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer()) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (is_decode_exported) {
+    if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
+                     nms_iou_threshold)) {
+      FDERROR << "Failed to post process." << std::endl;
+      return false;
+    }
+  } else {
+    if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info,
+                               conf_threshold, nms_iou_threshold)) {
+      FDERROR << "Failed to post process." << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h
new file mode 100755
index 0000000000..ec94fdc808
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite.h
@@ -0,0 +1,156 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv5Lite model object used when to load a YOLOv5Lite model exported
+ * by YOLOv5Lite.
+ */
+class ULTRAINFER_DECL YOLOv5Lite : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov5lite.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv5Lite(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::ONNX);
+
+  ~YOLOv5Lite();
+
+  virtual std::string ModelName() const { return "YOLOv5-Lite"; }
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.45 \param[in] nms_iou_threshold
+   * iou threashold for NMS, default is 0.25 \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.45,
+                       float nms_iou_threshold = 0.25);
+
+  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, size = {640, 640}
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+  // for offseting the boxes by classes when using NMS
+  float max_wh;
+  // downsample strides for YOLOv5Lite to generate anchors,
+  // will take (8,16,32) as default values, might have stride=64.
+  std::vector<int> downsample_strides;
+  // anchors parameters, downsample_strides will take (8,16,32),
+  // each stride has three anchors with width and hight
+  std::vector<std::vector<float>> anchor_config;
+  /*! @brief
+    whether the model_file was exported with decode module. The official
+    YOLOv5Lite/export.py script will export ONNX file without
+    decode module. Please set it 'true' manually if the model file
+    was exported with decode module.
+    false : ONNX files without decode module.
+    true : ONNX file with decode module. default false.
+  */
+  bool is_decode_exported;
+
+private:
+  // necessary parameters for GenerateAnchors to generate anchors when ONNX file
+  // without decode module.
+  struct Anchor {
+    int grid0;
+    int grid1;
+    int stride;
+    float anchor_w;
+    float anchor_h;
+  };
+
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool CudaPreprocess(Mat *mat, FDTensor *output,
+                      std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  // the official YOLOv5Lite/export.py will export ONNX file without decode
+  // module.
+  // this fuction support the postporocess for ONNX file without decode module.
+  // set the `is_decode_exported = false`, this function will work.
+  bool PostprocessWithDecode(
+      FDTensor &infer_result, DetectionResult *result,
+      const std::map<std::string, std::array<float, 2>> &im_info,
+      float conf_threshold, float nms_iou_threshold);
+
+  void LetterBox(Mat *mat, const std::vector<int> &size,
+                 const std::vector<float> &color, bool _auto,
+                 bool scale_fill = false, bool scale_up = true,
+                 int stride = 32);
+
+  // generate anchors for decodeing when ONNX file without decode module.
+  void GenerateAnchors(const std::vector<int> &size,
+                       const std::vector<int> &downsample_strides,
+                       std::vector<Anchor> *anchors, const int num_anchors = 3);
+
+  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
+  // or not.)
+  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
+  // value will
+  // auto check by ultrainfer after the internal Runtime already initialized.
+  bool is_dynamic_input_;
+  // CUDA host buffer for input image
+  uint8_t *input_img_cuda_buffer_host_ = nullptr;
+  // CUDA device buffer for input image
+  uint8_t *input_img_cuda_buffer_device_ = nullptr;
+  // CUDA device buffer for TRT input tensor
+  float *input_tensor_cuda_buffer_device_ = nullptr;
+  // Whether to use CUDA preprocessing
+  bool use_cuda_preprocessing_ = false;
+  // CUDA stream
+  void *cuda_stream_ = nullptr;
+};
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc
new file mode 100755
index 0000000000..9f1524ef28
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5lite_pybind.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv5Lite(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv5Lite, UltraInferModel>(m,
+                                                                   "YOLOv5Lite")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv5Lite &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def("use_cuda_preprocessing",
+           [](vision::detection::YOLOv5Lite &self, int max_image_size) {
+             self.UseCudaPreprocessing(max_image_size);
+           })
+      .def_readwrite("size", &vision::detection::YOLOv5Lite::size)
+      .def_readwrite("padding_value",
+                     &vision::detection::YOLOv5Lite::padding_value)
+      .def_readwrite("downsample_strides",
+                     &vision::detection::YOLOv5Lite::downsample_strides)
+      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5Lite::is_mini_pad)
+      .def_readwrite("is_no_pad", &vision::detection::YOLOv5Lite::is_no_pad)
+      .def_readwrite("is_scale_up", &vision::detection::YOLOv5Lite::is_scale_up)
+      .def_readwrite("stride", &vision::detection::YOLOv5Lite::stride)
+      .def_readwrite("max_wh", &vision::detection::YOLOv5Lite::max_wh)
+      .def_readwrite("anchor_config",
+                     &vision::detection::YOLOv5Lite::anchor_config)
+      .def_readwrite("is_decode_exported",
+                     &vision::detection::YOLOv5Lite::is_decode_exported);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc
new file mode 100755
index 0000000000..da980d2031
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.cc
@@ -0,0 +1,217 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv5SegPostprocessor::YOLOv5SegPostprocessor() {
+  conf_threshold_ = 0.25;
+  nms_threshold_ = 0.5;
+  mask_threshold_ = 0.5;
+  multi_label_ = true;
+  max_wh_ = 7680.0;
+  mask_nums_ = 32;
+}
+
+bool YOLOv5SegPostprocessor::Run(
+    const std::vector<FDTensor> &tensors, std::vector<DetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = tensors[0].shape[0];
+
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+    // store mask information
+    std::vector<std::vector<float>> mask_embeddings;
+    (*results)[bs].Clear();
+    if (multi_label_) {
+      (*results)[bs].Reserve(tensors[0].shape[1] *
+                             (tensors[0].shape[2] - mask_nums_ - 5));
+    } else {
+      (*results)[bs].Reserve(tensors[0].shape[1]);
+    }
+    if (tensors[0].dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    const float *data = reinterpret_cast<const float *>(tensors[0].Data()) +
+                        bs * tensors[0].shape[1] * tensors[0].shape[2];
+    for (size_t i = 0; i < tensors[0].shape[1]; ++i) {
+      int s = i * tensors[0].shape[2];
+      float cls_conf = data[s + 4];
+      float confidence = data[s + 4];
+      std::vector<float> mask_embedding(data + s + tensors[0].shape[2] -
+                                            mask_nums_,
+                                        data + s + tensors[0].shape[2]);
+      for (size_t k = 0; k < mask_embedding.size(); ++k) {
+        mask_embedding[k] *= cls_conf;
+      }
+      if (multi_label_) {
+        for (size_t j = 5; j < tensors[0].shape[2] - mask_nums_; ++j) {
+          confidence = data[s + 4];
+          const float *class_score = data + s + j;
+          confidence *= (*class_score);
+          // filter boxes by conf_threshold
+          if (confidence <= conf_threshold_) {
+            continue;
+          }
+          int32_t label_id = std::distance(data + s + 5, class_score);
+
+          // convert from [x, y, w, h] to [x1, y1, x2, y2]
+          (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+              data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+              data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+          (*results)[bs].label_ids.push_back(label_id);
+          (*results)[bs].scores.push_back(confidence);
+          // TODO(wangjunjie06): No zero copy
+          mask_embeddings.push_back(mask_embedding);
+        }
+      } else {
+        const float *max_class_score = std::max_element(
+            data + s + 5, data + s + tensors[0].shape[2] - mask_nums_);
+        confidence *= (*max_class_score);
+        // filter boxes by conf_threshold
+        if (confidence <= conf_threshold_) {
+          continue;
+        }
+        int32_t label_id = std::distance(data + s + 5, max_class_score);
+        // convert from [x, y, w, h] to [x1, y1, x2, y2]
+        (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+            data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+            data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+        (*results)[bs].label_ids.push_back(label_id);
+        (*results)[bs].scores.push_back(confidence);
+        mask_embeddings.push_back(mask_embedding);
+      }
+    }
+
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+    // get box index after nms
+    std::vector<int> index;
+    utils::NMS(&((*results)[bs]), nms_threshold_, &index);
+
+    // deal with mask
+    // step1: MatMul, (box_nums * 32) x (32 * 160 * 160) = box_nums * 160 * 160
+    // step2: Sigmoid
+    // step3: Resize to original image size
+    // step4: Select pixels greater than threshold and crop
+    (*results)[bs].contain_masks = true;
+    (*results)[bs].masks.resize((*results)[bs].boxes.size());
+    const float *data_mask =
+        reinterpret_cast<const float *>(tensors[1].Data()) +
+        bs * tensors[1].shape[1] * tensors[1].shape[2] * tensors[1].shape[3];
+    cv::Mat mask_proto =
+        cv::Mat(tensors[1].shape[1], tensors[1].shape[2] * tensors[1].shape[3],
+                CV_32FC(1), const_cast<float *>(data_mask));
+    // vector to cv::Mat for MatMul
+    // after push_back, Mat of m*n becomes (m + 1) * n
+    cv::Mat mask_proposals;
+    for (size_t i = 0; i < index.size(); ++i) {
+      mask_proposals.push_back(cv::Mat(mask_embeddings[index[i]]).t());
+    }
+    cv::Mat matmul_result = (mask_proposals * mask_proto).t();
+    cv::Mat masks = matmul_result.reshape(
+        (*results)[bs].boxes.size(), {static_cast<int>(tensors[1].shape[2]),
+                                      static_cast<int>(tensors[1].shape[3])});
+    // split for boxes nums
+    std::vector<cv::Mat> mask_channels;
+    cv::split(masks, mask_channels);
+
+    // scale the boxes to the origin image shape
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+             "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
+    // for mask
+    float pad_h_mask = (float)pad_h / out_h * tensors[1].shape[2];
+    float pad_w_mask = (float)pad_w / out_w * tensors[1].shape[3];
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      int32_t label_id = ((*results)[bs].label_ids)[i];
+      // clip box
+      (*results)[bs].boxes[i][0] =
+          (*results)[bs].boxes[i][0] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][1] =
+          (*results)[bs].boxes[i][1] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][2] =
+          (*results)[bs].boxes[i][2] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][3] =
+          (*results)[bs].boxes[i][3] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][0] =
+          std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][1] =
+          std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][2] =
+          std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][3] =
+          std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w);
+      (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h);
+      (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w);
+      (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h);
+      // deal with mask
+      cv::Mat dest, mask;
+      // sigmoid
+      cv::exp(-mask_channels[i], dest);
+      dest = 1.0 / (1.0 + dest);
+      // crop mask for feature map
+      int x1 = static_cast<int>(pad_w_mask);
+      int y1 = static_cast<int>(pad_h_mask);
+      int x2 = static_cast<int>(tensors[1].shape[3] - pad_w_mask);
+      int y2 = static_cast<int>(tensors[1].shape[2] - pad_h_mask);
+      cv::Rect roi(x1, y1, x2 - x1, y2 - y1);
+      dest = dest(roi);
+      cv::resize(dest, mask, cv::Size(ipt_w, ipt_h), 0, 0, cv::INTER_LINEAR);
+      // crop mask for source img
+      int x1_src = static_cast<int>(round((*results)[bs].boxes[i][0]));
+      int y1_src = static_cast<int>(round((*results)[bs].boxes[i][1]));
+      int x2_src = static_cast<int>(round((*results)[bs].boxes[i][2]));
+      int y2_src = static_cast<int>(round((*results)[bs].boxes[i][3]));
+      cv::Rect roi_src(x1_src, y1_src, x2_src - x1_src, y2_src - y1_src);
+      mask = mask(roi_src);
+      mask = mask > mask_threshold_;
+      // save mask in DetectionResult
+      int keep_mask_h = y2_src - y1_src;
+      int keep_mask_w = x2_src - x1_src;
+      int keep_mask_numel = keep_mask_h * keep_mask_w;
+      (*results)[bs].masks[i].Resize(keep_mask_numel);
+      (*results)[bs].masks[i].shape = {keep_mask_h, keep_mask_w};
+      uint8_t *keep_mask_ptr =
+          reinterpret_cast<uint8_t *>((*results)[bs].masks[i].Data());
+      std::memcpy(keep_mask_ptr, reinterpret_cast<uint8_t *>(mask.ptr()),
+                  keep_mask_numel * sizeof(uint8_t));
+    }
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h
new file mode 100755
index 0000000000..37694d07df
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Postprocessor object for YOLOv5Seg serials model.
+ */
+class ULTRAINFER_DECL YOLOv5SegPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for YOLOv5Seg serials model
+   */
+  YOLOv5SegPostprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &tensors,
+      std::vector<DetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.25
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.25
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.5
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.5
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+  /// Set multi_label, set true for eval, default true
+  void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; }
+
+  /// Get multi_label, default true
+  bool GetMultiLabel() const { return multi_label_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  bool multi_label_;
+  float max_wh_;
+  // channel nums of masks
+  int mask_nums_;
+  // mask threshold
+  float mask_threshold_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc
new file mode 100755
index 0000000000..ce820d8037
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.cc
@@ -0,0 +1,119 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv5SegPreprocessor::YOLOv5SegPreprocessor() {
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = true;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+}
+
+void YOLOv5SegPreprocessor::LetterBox(FDMat *mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+  if (std::fabs(scale - 1.0f) > 1e-06) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_value_);
+  }
+}
+
+bool YOLOv5SegPreprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  // yolov5seg's preprocess steps
+  // 1. letterbox
+  // 2. convert_and_permute(swap_rb=true)
+  LetterBox(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOv5SegPreprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h
new file mode 100755
index 0000000000..fca6ba0025
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h
@@ -0,0 +1,107 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for YOLOv5Seg serials model.
+ */
+class ULTRAINFER_DECL YOLOv5SegPreprocessor {
+public:
+  /** \brief Create a preprocessor instance for YOLOv5Seg serials model
+   */
+  YOLOv5SegPreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingValue(const std::vector<float> &padding_value) {
+    padding_value_ = padding_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  std::vector<float> GetPaddingValue() const { return padding_value_; }
+
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; }
+
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
+
+  /// Set is_mini_pad, pad to the minimum rectange
+  /// which height and width is times of stride
+  void SetMiniPad(bool is_mini_pad) { is_mini_pad_ = is_mini_pad; }
+
+  /// Get is_mini_pad, default false
+  bool GetMiniPad() const { return is_mini_pad_; }
+
+  /// Set padding stride, only for mini_pad mode
+  void SetStride(int stride) { stride_ = stride; }
+
+  /// Get padding stride, default 32
+  bool GetStride() const { return stride_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  void LetterBox(FDMat *mat);
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc
new file mode 100755
index 0000000000..481ee254ba
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.cc
@@ -0,0 +1,83 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv5Seg::YOLOv5Seg(const std::string &model_file,
+                     const std::string &params_file,
+                     const RuntimeOption &custom_option,
+                     const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv5Seg::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv5Seg::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool YOLOv5Seg::BatchPredict(const std::vector<cv::Mat> &images,
+                             std::vector<DetectionResult> *results) {
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h
new file mode 100755
index 0000000000..f384f095a8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg.h
@@ -0,0 +1,76 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/detection/contrib/yolov5seg/postprocessor.h"
+#include "ultrainfer/vision/detection/contrib/yolov5seg/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv5Seg model object used when to load a YOLOv5Seg model exported
+ * by YOLOv5.
+ */
+class ULTRAINFER_DECL YOLOv5Seg : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov5seg.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv5Seg(const std::string &model_file, const std::string &params_file = "",
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "yolov5seg"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, DetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  /// Get preprocessor reference of YOLOv5Seg
+  virtual YOLOv5SegPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of YOLOv5Seg
+  virtual YOLOv5SegPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  YOLOv5SegPreprocessor preprocessor_;
+  YOLOv5SegPostprocessor postprocessor_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc
new file mode 100755
index 0000000000..066d27ebe4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov5seg/yolov5seg_pybind.cc
@@ -0,0 +1,122 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv5Seg(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv5SegPreprocessor>(
+      m, "YOLOv5SegPreprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::YOLOv5SegPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+             if (!self.Run(&images, &outputs, &ims_info)) {
+               throw std::runtime_error("Failed to preprocess the input data "
+                                        "in PaddleClasPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return make_pair(outputs, ims_info);
+           })
+      .def_property("size", &vision::detection::YOLOv5SegPreprocessor::GetSize,
+                    &vision::detection::YOLOv5SegPreprocessor::SetSize)
+      .def_property("padding_value",
+                    &vision::detection::YOLOv5SegPreprocessor::GetPaddingValue,
+                    &vision::detection::YOLOv5SegPreprocessor::SetPaddingValue)
+      .def_property("is_scale_up",
+                    &vision::detection::YOLOv5SegPreprocessor::GetScaleUp,
+                    &vision::detection::YOLOv5SegPreprocessor::SetScaleUp)
+      .def_property("is_mini_pad",
+                    &vision::detection::YOLOv5SegPreprocessor::GetMiniPad,
+                    &vision::detection::YOLOv5SegPreprocessor::SetMiniPad)
+      .def_property("stride",
+                    &vision::detection::YOLOv5SegPreprocessor::GetStride,
+                    &vision::detection::YOLOv5SegPreprocessor::SetStride);
+
+  pybind11::class_<vision::detection::YOLOv5SegPostprocessor>(
+      m, "YOLOv5SegPostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::YOLOv5SegPostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in YOLOv5SegPostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::detection::YOLOv5SegPostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in YOLOv5SegPostprocessor.");
+             }
+             return results;
+           })
+      .def_property(
+          "conf_threshold",
+          &vision::detection::YOLOv5SegPostprocessor::GetConfThreshold,
+          &vision::detection::YOLOv5SegPostprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::detection::YOLOv5SegPostprocessor::GetNMSThreshold,
+                    &vision::detection::YOLOv5SegPostprocessor::SetNMSThreshold)
+      .def_property("multi_label",
+                    &vision::detection::YOLOv5SegPostprocessor::GetMultiLabel,
+                    &vision::detection::YOLOv5SegPostprocessor::SetMultiLabel);
+
+  pybind11::class_<vision::detection::YOLOv5Seg, UltraInferModel>(m,
+                                                                  "YOLOv5Seg")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv5Seg &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::YOLOv5Seg &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::YOLOv5Seg::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::YOLOv5Seg::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc
new file mode 100755
index 0000000000..5277adecf5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.cc
@@ -0,0 +1,342 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov6.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+#ifdef WITH_GPU
+#include "ultrainfer/vision/utils/cuda_utils.h"
+#endif // WITH_GPU
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace detection {
+
+void YOLOv6::LetterBox(Mat *mat, std::vector<int> size,
+                       std::vector<float> color, bool _auto, bool scale_fill,
+                       bool scale_up, int stride) {
+  float scale = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(static_cast<float>(mat->Height()) * scale));
+  int resize_w = int(round(static_cast<float>(mat->Width()) * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+YOLOv6::YOLOv6(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+#ifdef WITH_GPU
+  cudaSetDevice(runtime_option.device_id);
+  cudaStream_t stream;
+  CUDA_CHECK(cudaStreamCreate(&stream));
+  cuda_stream_ = reinterpret_cast<void *>(stream);
+  runtime_option.SetExternalStream(cuda_stream_);
+#endif // WITH_GPU
+  initialized = Initialize();
+}
+
+bool YOLOv6::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  max_wh = 4096.0f;
+  reused_input_tensors_.resize(1);
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+  return true;
+}
+
+YOLOv6::~YOLOv6() {
+#ifdef WITH_GPU
+  if (use_cuda_preprocessing_) {
+    CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_));
+    CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_));
+    CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_));
+    CUDA_CHECK(cudaStreamDestroy(reinterpret_cast<cudaStream_t>(cuda_stream_)));
+  }
+#endif // WITH_GPU
+}
+
+bool YOLOv6::Preprocess(Mat *mat, FDTensor *output,
+                        std::map<std::string, std::array<float, 2>> *im_info) {
+  // process after image load
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_AREA;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(round(static_cast<float>(mat->Height()) * ratio));
+    int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  // yolov6's preprocess steps
+  // 1. letterbox
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
+            stride);
+  BGR2RGB::Run(mat);
+  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+  //                std::vector<float>(mat->Channels(), 1.0));
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+void YOLOv6::UseCudaPreprocessing(int max_image_size) {
+#ifdef WITH_GPU
+  use_cuda_preprocessing_ = true;
+  is_scale_up = true;
+  if (input_img_cuda_buffer_host_ == nullptr) {
+    // prepare input data cache in GPU pinned memory
+    CUDA_CHECK(cudaMallocHost((void **)&input_img_cuda_buffer_host_,
+                              max_image_size * 3));
+    // prepare input data cache in GPU device memory
+    CUDA_CHECK(cudaMalloc((void **)&input_img_cuda_buffer_device_,
+                          max_image_size * 3));
+    CUDA_CHECK(cudaMalloc((void **)&input_tensor_cuda_buffer_device_,
+                          3 * size[0] * size[1] * sizeof(float)));
+  }
+#else
+  FDWARNING << "The UltraInfer didn't compile with WITH_GPU=ON." << std::endl;
+  use_cuda_preprocessing_ = false;
+#endif
+}
+
+bool YOLOv6::CudaPreprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+#ifdef WITH_GPU
+  if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) {
+    FDERROR << "Preprocessing with CUDA is only available when the arguments "
+               "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)."
+            << std::endl;
+    return false;
+  }
+
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream_);
+  int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels();
+  memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size);
+  CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_,
+                             input_img_cuda_buffer_host_, src_img_buf_size,
+                             cudaMemcpyHostToDevice, stream));
+  utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(),
+                            mat->Height(), input_tensor_cuda_buffer_device_,
+                            size[0], size[1], padding_value, stream);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(size[0]),
+                                static_cast<float>(size[1])};
+
+  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
+                          input_tensor_cuda_buffer_device_);
+  output->device = Device::GPU;
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+#else
+  FDERROR << "CUDA src code was not enabled." << std::endl;
+  return false;
+#endif // WITH_GPU
+}
+
+bool YOLOv6::Postprocess(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    int s = i * infer_result.shape[2];
+    float confidence = data[s + 4];
+    float *max_class_score =
+        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+    confidence *= (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(data + s + 5, max_class_score);
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
+        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOv6::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold,
+                     float nms_iou_threshold) {
+  Mat mat(*im);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (use_cuda_preprocessing_) {
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  } else {
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer()) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h
new file mode 100755
index 0000000000..1d0762ea7b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6.h
@@ -0,0 +1,125 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace detection {
+/*! @brief YOLOv6 model object used when to load a YOLOv6 model exported by
+ * YOLOv6.
+ */
+class ULTRAINFER_DECL YOLOv6 : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov6.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv6(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  ~YOLOv6();
+
+  std::string ModelName() const { return "YOLOv6"; }
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.25 \param[in] nms_iou_threshold
+   * iou threashold for NMS, default is 0.5 \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25,
+                       float nms_iou_threshold = 0.5);
+
+  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {640, 640};
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+  // for offseting the boxes by classes when using NMS,
+  // default 4096 in meituan/YOLOv6
+  float max_wh;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool CudaPreprocess(Mat *mat, FDTensor *output,
+                      std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  bool IsDynamicInput() const { return is_dynamic_input_; }
+
+  void LetterBox(Mat *mat, std::vector<int> size, std::vector<float> color,
+                 bool _auto, bool scale_fill = false, bool scale_up = true,
+                 int stride = 32);
+
+  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
+  // or not.)
+  // meituan/YOLOv6 official 'export_onnx.py' script will export static ONNX by
+  // default.
+  // while is_dynamic_input if 'false', is_mini_pad will force 'false'. This
+  // value will
+  // auto check by ultrainfer after the internal Runtime already initialized.
+  bool is_dynamic_input_;
+  // CUDA host buffer for input image
+  uint8_t *input_img_cuda_buffer_host_ = nullptr;
+  // CUDA device buffer for input image
+  uint8_t *input_img_cuda_buffer_device_ = nullptr;
+  // CUDA device buffer for TRT input tensor
+  float *input_tensor_cuda_buffer_device_ = nullptr;
+  // Whether to use CUDA preprocessing
+  bool use_cuda_preprocessing_ = false;
+  // CUDA stream
+  void *cuda_stream_ = nullptr;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc
new file mode 100755
index 0000000000..2e629f66dc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov6_pybind.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv6(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv6, UltraInferModel>(m, "YOLOv6")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv6 &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def("use_cuda_preprocessing",
+           [](vision::detection::YOLOv6 &self, int max_image_size) {
+             self.UseCudaPreprocessing(max_image_size);
+           })
+      .def_readwrite("size", &vision::detection::YOLOv6::size)
+      .def_readwrite("padding_value", &vision::detection::YOLOv6::padding_value)
+      .def_readwrite("is_mini_pad", &vision::detection::YOLOv6::is_mini_pad)
+      .def_readwrite("is_no_pad", &vision::detection::YOLOv6::is_no_pad)
+      .def_readwrite("is_scale_up", &vision::detection::YOLOv6::is_scale_up)
+      .def_readwrite("stride", &vision::detection::YOLOv6::stride)
+      .def_readwrite("max_wh", &vision::detection::YOLOv6::max_wh);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc
new file mode 100755
index 0000000000..814dc1969e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.cc
@@ -0,0 +1,117 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov7/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv7Postprocessor::YOLOv7Postprocessor() {
+  conf_threshold_ = 0.25;
+  nms_threshold_ = 0.5;
+  max_wh_ = 7680.0;
+}
+
+bool YOLOv7Postprocessor::Run(
+    const std::vector<FDTensor> &tensors, std::vector<DetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = tensors[0].shape[0];
+
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+    (*results)[bs].Clear();
+    (*results)[bs].Reserve(tensors[0].shape[1]);
+    if (tensors[0].dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    const float *data = reinterpret_cast<const float *>(tensors[0].Data()) +
+                        bs * tensors[0].shape[1] * tensors[0].shape[2];
+    for (size_t i = 0; i < tensors[0].shape[1]; ++i) {
+      int s = i * tensors[0].shape[2];
+      float confidence = data[s + 4];
+      const float *max_class_score =
+          std::max_element(data + s + 5, data + s + tensors[0].shape[2]);
+      confidence *= (*max_class_score);
+      // filter boxes by conf_threshold
+      if (confidence <= conf_threshold_) {
+        continue;
+      }
+      int32_t label_id = std::distance(data + s + 5, max_class_score);
+      // convert from [x, y, w, h] to [x1, y1, x2, y2]
+      (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+          data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+          data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+          data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+          data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+      (*results)[bs].label_ids.push_back(label_id);
+      (*results)[bs].scores.push_back(confidence);
+    }
+
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    utils::NMS(&((*results)[bs]), nms_threshold_);
+
+    // scale the boxes to the origin image shape
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+             "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      int32_t label_id = ((*results)[bs].label_ids)[i];
+      // clip box
+      (*results)[bs].boxes[i][0] =
+          (*results)[bs].boxes[i][0] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][1] =
+          (*results)[bs].boxes[i][1] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][2] =
+          (*results)[bs].boxes[i][2] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][3] =
+          (*results)[bs].boxes[i][3] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][0] =
+          std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][1] =
+          std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][2] =
+          std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][3] =
+          std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][0] =
+          std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][1] =
+          std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f);
+      (*results)[bs].boxes[i][2] =
+          std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][3] =
+          std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f);
+    }
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h
new file mode 100755
index 0000000000..fc29fb7bf2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/postprocessor.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Postprocessor object for YOLOv7 serials model.
+ */
+class ULTRAINFER_DECL YOLOv7Postprocessor {
+public:
+  /** \brief Create a postprocessor instance for YOLOv7 serials model
+   */
+  YOLOv7Postprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &tensors,
+      std::vector<DetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.25
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.25
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.5
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.5
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  float max_wh_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc
new file mode 100755
index 0000000000..e54c5107f6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.cc
@@ -0,0 +1,119 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov7/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv7Preprocessor::YOLOv7Preprocessor() {
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = true;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+}
+
+void YOLOv7Preprocessor::LetterBox(FDMat *mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+  if (std::fabs(scale - 1.0f) > 1e-06) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_value_);
+  }
+}
+
+bool YOLOv7Preprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  // yolov7's preprocess steps
+  // 1. letterbox
+  // 2. convert_and_permute(swap_rb=true)
+  LetterBox(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOv7Preprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h
new file mode 100755
index 0000000000..6ba82f9a31
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/preprocessor.h
@@ -0,0 +1,94 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for YOLOv7 serials model.
+ */
+class ULTRAINFER_DECL YOLOv7Preprocessor {
+public:
+  /** \brief Create a preprocessor instance for YOLOv7 serials model
+   */
+  YOLOv7Preprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingValue(const std::vector<float> &padding_value) {
+    padding_value_ = padding_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  std::vector<float> GetPaddingValue() const { return padding_value_; }
+
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; }
+
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  void LetterBox(FDMat *mat);
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc
new file mode 100755
index 0000000000..2646be429a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov7/yolov7.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv7::YOLOv7(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv7::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv7::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold,
+                     float nms_threshold) {
+  postprocessor_.SetConfThreshold(conf_threshold);
+  postprocessor_.SetNMSThreshold(nms_threshold);
+  if (!Predict(*im, result)) {
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv7::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool YOLOv7::BatchPredict(const std::vector<cv::Mat> &images,
+                          std::vector<DetectionResult> *results) {
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h
new file mode 100755
index 0000000000..a38c806e97
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7.h
@@ -0,0 +1,89 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/detection/contrib/yolov7/postprocessor.h"
+#include "ultrainfer/vision/detection/contrib/yolov7/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv7 model object used when to load a YOLOv7 model exported by
+ * YOLOv7.
+ */
+class ULTRAINFER_DECL YOLOv7 : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov7.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv7(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "yolov7"; }
+
+  /** \brief DEPRECATED Predict the detection result for an input image, remove
+   * at 1.0 version
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.25 \param[in] nms_threshold iou
+   * threashold for NMS, default is 0.5 \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25, float nms_threshold = 0.5);
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, DetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  /// Get preprocessor reference of YOLOv7
+  virtual YOLOv7Preprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of YOLOv7
+  virtual YOLOv7Postprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  YOLOv7Preprocessor preprocessor_;
+  YOLOv7Postprocessor postprocessor_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc
new file mode 100755
index 0000000000..3def25b5fc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7/yolov7_pybind.cc
@@ -0,0 +1,112 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv7(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv7Preprocessor>(m,
+                                                          "YOLOv7Preprocessor")
+      .def(pybind11::init<>())
+      .def(
+          "run",
+          [](vision::detection::YOLOv7Preprocessor &self,
+             std::vector<pybind11::array> &im_list) {
+            std::vector<vision::FDMat> images;
+            for (size_t i = 0; i < im_list.size(); ++i) {
+              images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+            }
+            std::vector<FDTensor> outputs;
+            std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+            if (!self.Run(&images, &outputs, &ims_info)) {
+              throw std::runtime_error(
+                  "Failed to preprocess the input data in YOLOV7Preprocessor.");
+            }
+            for (size_t i = 0; i < outputs.size(); ++i) {
+              outputs[i].StopSharing();
+            }
+            return make_pair(outputs, ims_info);
+          })
+      .def_property("size", &vision::detection::YOLOv7Preprocessor::GetSize,
+                    &vision::detection::YOLOv7Preprocessor::SetSize)
+      .def_property("padding_value",
+                    &vision::detection::YOLOv7Preprocessor::GetPaddingValue,
+                    &vision::detection::YOLOv7Preprocessor::SetPaddingValue)
+      .def_property("is_scale_up",
+                    &vision::detection::YOLOv7Preprocessor::GetScaleUp,
+                    &vision::detection::YOLOv7Preprocessor::SetScaleUp);
+
+  pybind11::class_<vision::detection::YOLOv7Postprocessor>(
+      m, "YOLOv7Postprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::YOLOv7Postprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in YOLOv7Postprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::detection::YOLOv7Postprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in YOLOv7Postprocessor.");
+             }
+             return results;
+           })
+      .def_property("conf_threshold",
+                    &vision::detection::YOLOv7Postprocessor::GetConfThreshold,
+                    &vision::detection::YOLOv7Postprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::detection::YOLOv7Postprocessor::GetNMSThreshold,
+                    &vision::detection::YOLOv7Postprocessor::SetNMSThreshold);
+
+  pybind11::class_<vision::detection::YOLOv7, UltraInferModel>(m, "YOLOv7")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv7 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::YOLOv7 &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::YOLOv7::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::YOLOv7::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc
new file mode 100755
index 0000000000..e2979f9a09
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.cc
@@ -0,0 +1,249 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov7end2end_ort.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+void YOLOv7End2EndORT::LetterBox(Mat *mat, const std::vector<int> &size,
+                                 const std::vector<float> &color, bool _auto,
+                                 bool scale_fill, bool scale_up, int stride) {
+  float scale =
+      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+YOLOv7End2EndORT::YOLOv7End2EndORT(const std::string &model_file,
+                                   const std::string &params_file,
+                                   const RuntimeOption &custom_option,
+                                   const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT}; // NO TRT
+  } else {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  if (custom_option.backend == Backend::TRT) {
+    FDWARNING << "Backend::TRT is not support for YOLOv7End2EndORT, "
+              << "will fallback to Backend::ORT." << std::endl;
+  }
+  initialized = Initialize();
+}
+
+bool YOLOv7End2EndORT::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  reused_input_tensors_.resize(1);
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+  return true;
+}
+
+bool YOLOv7End2EndORT::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_AREA;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(mat->Height() * ratio);
+    int resize_w = int(mat->Width() * ratio);
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  YOLOv7End2EndORT::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
+                              is_scale_up, stride);
+  BGR2RGB::Run(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOv7End2EndORT::Postprocess(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold) {
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  // detected success without valid objects.
+  if (infer_result.shape[0] == 0) {
+    return true;
+  }
+
+  result->Clear();
+  result->Reserve(infer_result.shape[0]);
+  // (?,7) (batch_id,x0,y0,x1,y1,cls_id,score) after nms
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[0]; ++i) {
+    const float *box_cls_ptr = data + (i * 7);
+    int64_t batch_id = static_cast<int64_t>(box_cls_ptr[0] + 0.5f); // 0,1, ...
+    FDASSERT(batch_id == 0,
+             "Only support batch=1 now, but found batch_id != 0.");
+    float confidence = box_cls_ptr[6];
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = static_cast<int32_t>(box_cls_ptr[5] + 0.5f);
+    float x1 = box_cls_ptr[1];
+    float y1 = box_cls_ptr[2];
+    float x2 = box_cls_ptr[3];
+    float y2 = box_cls_ptr[4];
+
+    result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+
+  if (result->boxes.size() == 0) {
+    return true;
+  }
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  float pad_h = (out_h - ipt_h * scale) / 2.0f;
+  float pad_w = (out_w - ipt_w * scale) / 2.0f;
+  if (is_mini_pad) {
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOv7End2EndORT::Predict(cv::Mat *im, DetectionResult *result,
+                               float conf_threshold) {
+  Mat mat(*im);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer()) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(reused_output_tensors_[0], result, im_info,
+                   conf_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h
new file mode 100755
index 0000000000..b6352a6333
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort.h
@@ -0,0 +1,92 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv7End2EndORT model object used when to load a YOLOv7End2EndORT
+ * model exported by YOLOv7.
+ */
+class ULTRAINFER_DECL YOLOv7End2EndORT : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov7end2end_ort.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv7End2EndORT(const std::string &model_file,
+                   const std::string &params_file = "",
+                   const RuntimeOption &custom_option = RuntimeOption(),
+                   const ModelFormat &model_format = ModelFormat::ONNX);
+
+  virtual std::string ModelName() const { return "yolov7end2end_ort"; }
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.25 \return true if the
+   * prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {640, 640}
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold);
+
+  void LetterBox(Mat *mat, const std::vector<int> &size,
+                 const std::vector<float> &color, bool _auto,
+                 bool scale_fill = false, bool scale_up = true,
+                 int stride = 32);
+
+  bool is_dynamic_input_;
+};
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc
new file mode 100755
index 0000000000..0c82edc2c8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_ort_pybind.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv7End2EndORT(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv7End2EndORT, UltraInferModel>(
+      m, "YOLOv7End2EndORT")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv7End2EndORT &self, pybind11::array &data,
+              float conf_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::detection::YOLOv7End2EndORT::size)
+      .def_readwrite("padding_value",
+                     &vision::detection::YOLOv7End2EndORT::padding_value)
+      .def_readwrite("is_mini_pad",
+                     &vision::detection::YOLOv7End2EndORT::is_mini_pad)
+      .def_readwrite("is_no_pad",
+                     &vision::detection::YOLOv7End2EndORT::is_no_pad)
+      .def_readwrite("is_scale_up",
+                     &vision::detection::YOLOv7End2EndORT::is_scale_up)
+      .def_readwrite("stride", &vision::detection::YOLOv7End2EndORT::stride);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc
new file mode 100755
index 0000000000..80b8e2787f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.cc
@@ -0,0 +1,357 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov7end2end_trt.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+#ifdef WITH_GPU
+#include "ultrainfer/vision/utils/cuda_utils.h"
+#endif // WITH_GPU
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+void YOLOv7End2EndTRT::LetterBox(Mat *mat, const std::vector<int> &size,
+                                 const std::vector<float> &color, bool _auto,
+                                 bool scale_fill, bool scale_up, int stride) {
+  float scale =
+      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+YOLOv7End2EndTRT::YOLOv7End2EndTRT(const std::string &model_file,
+                                   const std::string &params_file,
+                                   const RuntimeOption &custom_option,
+                                   const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {};             // NO CPU
+    valid_gpu_backends = {Backend::TRT}; // NO ORT
+  } else {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  if (runtime_option.device != Device::GPU) {
+    FDWARNING << runtime_option.device
+              << " is not support for YOLOv7End2EndTRT,"
+              << "will fallback to Device::GPU." << std::endl;
+    runtime_option.device = Device::GPU;
+  }
+  if (runtime_option.backend != Backend::UNKNOWN) {
+    if (runtime_option.backend != Backend::TRT) {
+      FDWARNING << runtime_option.backend
+                << " is not support for YOLOv7End2EndTRT,"
+                << "will fallback to Backend::TRT." << std::endl;
+      runtime_option.backend = Backend::TRT;
+    }
+  }
+#ifdef WITH_GPU
+  cudaSetDevice(runtime_option.device_id);
+  cudaStream_t stream;
+  CUDA_CHECK(cudaStreamCreate(&stream));
+  cuda_stream_ = reinterpret_cast<void *>(stream);
+  runtime_option.SetExternalStream(cuda_stream_);
+#endif // WITH_GPU
+  initialized = Initialize();
+}
+
+bool YOLOv7End2EndTRT::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  reused_input_tensors_.resize(1);
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+  return true;
+}
+
+YOLOv7End2EndTRT::~YOLOv7End2EndTRT() {
+#ifdef WITH_GPU
+  if (use_cuda_preprocessing_) {
+    CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_));
+    CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_));
+    CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_));
+    CUDA_CHECK(cudaStreamDestroy(reinterpret_cast<cudaStream_t>(cuda_stream_)));
+  }
+#endif // WITH_GPU
+}
+
+bool YOLOv7End2EndTRT::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_AREA;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(mat->Height() * ratio);
+    int resize_w = int(mat->Width() * ratio);
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  YOLOv7End2EndTRT::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
+                              is_scale_up, stride);
+  BGR2RGB::Run(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+void YOLOv7End2EndTRT::UseCudaPreprocessing(int max_image_size) {
+#ifdef WITH_GPU
+  use_cuda_preprocessing_ = true;
+  is_scale_up = true;
+  if (input_img_cuda_buffer_host_ == nullptr) {
+    // prepare input data cache in GPU pinned memory
+    CUDA_CHECK(cudaMallocHost((void **)&input_img_cuda_buffer_host_,
+                              max_image_size * 3));
+    // prepare input data cache in GPU device memory
+    CUDA_CHECK(cudaMalloc((void **)&input_img_cuda_buffer_device_,
+                          max_image_size * 3));
+    CUDA_CHECK(cudaMalloc((void **)&input_tensor_cuda_buffer_device_,
+                          3 * size[0] * size[1] * sizeof(float)));
+  }
+#else
+  FDWARNING << "The UltraInfer didn't compile with WITH_GPU=ON." << std::endl;
+  use_cuda_preprocessing_ = false;
+#endif
+}
+
+bool YOLOv7End2EndTRT::CudaPreprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+#ifdef WITH_GPU
+  if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) {
+    FDERROR << "Preprocessing with CUDA is only available when the arguments "
+               "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)."
+            << std::endl;
+    return false;
+  }
+
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream_);
+  int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels();
+  memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size);
+  CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_,
+                             input_img_cuda_buffer_host_, src_img_buf_size,
+                             cudaMemcpyHostToDevice, stream));
+  utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(),
+                            mat->Height(), input_tensor_cuda_buffer_device_,
+                            size[0], size[1], padding_value, stream);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(size[0]),
+                                static_cast<float>(size[1])};
+
+  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
+                          input_tensor_cuda_buffer_device_);
+  output->device = Device::GPU;
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+#else
+  FDERROR << "CUDA src code was not enabled." << std::endl;
+  return false;
+#endif // WITH_GPU
+}
+
+bool YOLOv7End2EndTRT::Postprocess(
+    std::vector<FDTensor> &infer_results, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold) {
+  FDASSERT(infer_results.size() == 4, "Output tensor size must be 4.");
+  FDTensor &num_tensor = infer_results.at(0);     // INT32
+  FDTensor &boxes_tensor = infer_results.at(1);   // FLOAT
+  FDTensor &scores_tensor = infer_results.at(2);  // FLOAT
+  FDTensor &classes_tensor = infer_results.at(3); // INT32
+  FDASSERT(num_tensor.dtype == FDDataType::INT32,
+           "The dtype of num_dets must be INT32.");
+  FDASSERT(boxes_tensor.dtype == FDDataType::FP32,
+           "The dtype of det_boxes_tensor must be FP32.");
+  FDASSERT(scores_tensor.dtype == FDDataType::FP32,
+           "The dtype of det_scores_tensor must be FP32.");
+  FDASSERT(classes_tensor.dtype == FDDataType::INT32,
+           "The dtype of det_classes_tensor must be INT32.");
+  FDASSERT(num_tensor.shape[0] == 1, "Only support batch=1 now.");
+  // post-process for end2end yolov7 after trt nms.
+  float *boxes_data = static_cast<float *>(boxes_tensor.Data());   // (1,100,4)
+  float *scores_data = static_cast<float *>(scores_tensor.Data()); // (1,100)
+  int32_t *classes_data =
+      static_cast<int32_t *>(classes_tensor.Data()); // (1,100)
+  int32_t num_dets_after_trt_nms = static_cast<int32_t *>(num_tensor.Data())[0];
+  if (num_dets_after_trt_nms == 0) {
+    return true;
+  }
+  result->Clear();
+  result->Reserve(num_dets_after_trt_nms);
+  for (size_t i = 0; i < num_dets_after_trt_nms; ++i) {
+    float confidence = scores_data[i];
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = classes_data[i];
+    float x1 = boxes_data[(i * 4) + 0];
+    float y1 = boxes_data[(i * 4) + 1];
+    float x2 = boxes_data[(i * 4) + 2];
+    float y2 = boxes_data[(i * 4) + 3];
+
+    result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+
+  if (result->boxes.size() == 0) {
+    return true;
+  }
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  float pad_h = (out_h - ipt_h * scale) / 2.0f;
+  float pad_w = (out_w - ipt_w * scale) / 2.0f;
+  if (is_mini_pad) {
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOv7End2EndTRT::Predict(cv::Mat *im, DetectionResult *result,
+                               float conf_threshold) {
+  Mat mat(*im);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (use_cuda_preprocessing_) {
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  } else {
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer()) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h
new file mode 100755
index 0000000000..512b4d0e3f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt.h
@@ -0,0 +1,110 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv7End2EndTRT model object used when to load a YOLOv7End2EndTRT
+ * model exported by YOLOv7.
+ */
+class ULTRAINFER_DECL YOLOv7End2EndTRT : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov7end2end_trt.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv7End2EndTRT(const std::string &model_file,
+                   const std::string &params_file = "",
+                   const RuntimeOption &custom_option = RuntimeOption(),
+                   const ModelFormat &model_format = ModelFormat::ONNX);
+
+  ~YOLOv7End2EndTRT();
+
+  virtual std::string ModelName() const { return "yolov7end2end_trt"; }
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.25 \return true if the
+   * prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25);
+
+  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {640, 640}
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool CudaPreprocess(Mat *mat, FDTensor *output,
+                      std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(std::vector<FDTensor> &infer_results,
+                   DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold);
+
+  void LetterBox(Mat *mat, const std::vector<int> &size,
+                 const std::vector<float> &color, bool _auto,
+                 bool scale_fill = false, bool scale_up = true,
+                 int stride = 32);
+
+  bool is_dynamic_input_;
+  // CUDA host buffer for input image
+  uint8_t *input_img_cuda_buffer_host_ = nullptr;
+  // CUDA device buffer for input image
+  uint8_t *input_img_cuda_buffer_device_ = nullptr;
+  // CUDA device buffer for TRT input tensor
+  float *input_tensor_cuda_buffer_device_ = nullptr;
+  // Whether to use CUDA preprocessing
+  bool use_cuda_preprocessing_ = false;
+  // CUDA stream
+  void *cuda_stream_ = nullptr;
+};
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc
new file mode 100755
index 0000000000..c677a3ecc5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov7end2end_trt_pybind.cc
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv7End2EndTRT(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv7End2EndTRT, UltraInferModel>(
+      m, "YOLOv7End2EndTRT")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv7End2EndTRT &self, pybind11::array &data,
+              float conf_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold);
+             return res;
+           })
+      .def("use_cuda_preprocessing",
+           [](vision::detection::YOLOv7End2EndTRT &self, int max_image_size) {
+             self.UseCudaPreprocessing(max_image_size);
+           })
+      .def_readwrite("size", &vision::detection::YOLOv7End2EndTRT::size)
+      .def_readwrite("padding_value",
+                     &vision::detection::YOLOv7End2EndTRT::padding_value)
+      .def_readwrite("is_mini_pad",
+                     &vision::detection::YOLOv7End2EndTRT::is_mini_pad)
+      .def_readwrite("is_no_pad",
+                     &vision::detection::YOLOv7End2EndTRT::is_no_pad)
+      .def_readwrite("is_scale_up",
+                     &vision::detection::YOLOv7End2EndTRT::is_scale_up)
+      .def_readwrite("stride", &vision::detection::YOLOv7End2EndTRT::stride);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc
new file mode 100755
index 0000000000..695d585285
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.cc
@@ -0,0 +1,143 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov8/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv8Postprocessor::YOLOv8Postprocessor() {
+  conf_threshold_ = 0.25;
+  nms_threshold_ = 0.5;
+  multi_label_ = true;
+  max_wh_ = 7680.0;
+}
+
+bool YOLOv8Postprocessor::Run(
+    const std::vector<FDTensor> &tensors, std::vector<DetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = tensors[0].shape[0];
+  // transpose
+  std::vector<int64_t> dim{0, 2, 1};
+  FDTensor tensor_transpose;
+  function::Transpose(tensors[0], &tensor_transpose, dim);
+
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+    (*results)[bs].Clear();
+    if (multi_label_) {
+      (*results)[bs].Reserve(tensor_transpose.shape[1] *
+                             (tensor_transpose.shape[2] - 4));
+    } else {
+      (*results)[bs].Reserve(tensor_transpose.shape[1]);
+    }
+    if (tensor_transpose.dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    const float *data =
+        reinterpret_cast<const float *>(tensor_transpose.Data()) +
+        bs * tensor_transpose.shape[1] * tensor_transpose.shape[2];
+    for (size_t i = 0; i < tensor_transpose.shape[1]; ++i) {
+      int s = i * tensor_transpose.shape[2];
+      if (multi_label_) {
+        for (size_t j = 4; j < tensor_transpose.shape[2]; ++j) {
+          float confidence = data[s + j];
+          // filter boxes by conf_threshold
+          if (confidence <= conf_threshold_) {
+            continue;
+          }
+          int32_t label_id = j - 4;
+
+          // convert from [x, y, w, h] to [x1, y1, x2, y2]
+          (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+              data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+              data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+          (*results)[bs].label_ids.push_back(label_id);
+          (*results)[bs].scores.push_back(confidence);
+        }
+      } else {
+        const float *max_class_score = std::max_element(
+            data + s + 4, data + s + tensor_transpose.shape[2]);
+        float confidence = *max_class_score;
+        // filter boxes by conf_threshold
+        if (confidence <= conf_threshold_) {
+          continue;
+        }
+        int32_t label_id = std::distance(data + s + 4, max_class_score);
+        // convert from [x, y, w, h] to [x1, y1, x2, y2]
+        (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+            data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+            data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+        (*results)[bs].label_ids.push_back(label_id);
+        (*results)[bs].scores.push_back(confidence);
+      }
+    }
+
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    utils::NMS(&((*results)[bs]), nms_threshold_);
+
+    // scale the boxes to the origin image shape
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+             "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      int32_t label_id = ((*results)[bs].label_ids)[i];
+      // clip box
+      (*results)[bs].boxes[i][0] =
+          (*results)[bs].boxes[i][0] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][1] =
+          (*results)[bs].boxes[i][1] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][2] =
+          (*results)[bs].boxes[i][2] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][3] =
+          (*results)[bs].boxes[i][3] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][0] =
+          std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][1] =
+          std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][2] =
+          std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][3] =
+          std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w);
+      (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h);
+      (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w);
+      (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h);
+    }
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h
new file mode 100755
index 0000000000..c78eefb688
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/postprocessor.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Postprocessor object for YOLOv8 serials model.
+ */
+class ULTRAINFER_DECL YOLOv8Postprocessor {
+public:
+  /** \brief Create a postprocessor instance for YOLOv8 serials model
+   */
+  YOLOv8Postprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &tensors,
+      std::vector<DetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.25
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.25
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.5
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.5
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+  /// Set multi_label, set true for eval, default true
+  void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; }
+
+  /// Get multi_label, default true
+  bool GetMultiLabel() const { return multi_label_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  bool multi_label_;
+  float max_wh_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc
new file mode 100755
index 0000000000..606549da45
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.cc
@@ -0,0 +1,119 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov8/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv8Preprocessor::YOLOv8Preprocessor() {
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = true;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+}
+
+void YOLOv8Preprocessor::LetterBox(FDMat *mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+  if (std::fabs(scale - 1.0f) > 1e-06) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_value_);
+  }
+}
+
+bool YOLOv8Preprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  // yolov8's preprocess steps
+  // 1. letterbox
+  // 2. convert_and_permute(swap_rb=true)
+  LetterBox(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOv8Preprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h
new file mode 100755
index 0000000000..08185b1a2d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/preprocessor.h
@@ -0,0 +1,107 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for YOLOv8 serials model.
+ */
+class ULTRAINFER_DECL YOLOv8Preprocessor {
+public:
+  /** \brief Create a preprocessor instance for YOLOv8 serials model
+   */
+  YOLOv8Preprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingValue(const std::vector<float> &padding_value) {
+    padding_value_ = padding_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  std::vector<float> GetPaddingValue() const { return padding_value_; }
+
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; }
+
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
+
+  /// Set is_mini_pad, pad to the minimum rectange
+  /// which height and width is times of stride
+  void SetMiniPad(bool is_mini_pad) { is_mini_pad_ = is_mini_pad; }
+
+  /// Get is_mini_pad, default false
+  bool GetMiniPad() const { return is_mini_pad_; }
+
+  /// Set padding stride, only for mini_pad mode
+  void SetStride(int stride) { stride_ = stride; }
+
+  /// Get padding stride, default 32
+  bool GetStride() const { return stride_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  void LetterBox(FDMat *mat);
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc
new file mode 100755
index 0000000000..2c66eb31ab
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.cc
@@ -0,0 +1,82 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolov8/yolov8.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+YOLOv8::YOLOv8(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv8::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv8::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool YOLOv8::BatchPredict(const std::vector<cv::Mat> &images,
+                          std::vector<DetectionResult> *results) {
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h
new file mode 100755
index 0000000000..cf96376feb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8.h
@@ -0,0 +1,76 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/detection/contrib/yolov8/postprocessor.h"
+#include "ultrainfer/vision/detection/contrib/yolov8/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv8 model object used when to load a YOLOv8 model exported by
+ * YOLOv8.
+ */
+class ULTRAINFER_DECL YOLOv8 : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov8.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv8(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "yolov8"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, DetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  /// Get preprocessor reference of YOLOv8
+  virtual YOLOv8Preprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of YOLOv8
+  virtual YOLOv8Postprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  YOLOv8Preprocessor preprocessor_;
+  YOLOv8Postprocessor postprocessor_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc
new file mode 100755
index 0000000000..929e9dbf54
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolov8/yolov8_pybind.cc
@@ -0,0 +1,122 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv8(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOv8Preprocessor>(m,
+                                                          "YOLOv8Preprocessor")
+      .def(pybind11::init<>())
+      .def(
+          "run",
+          [](vision::detection::YOLOv8Preprocessor &self,
+             std::vector<pybind11::array> &im_list) {
+            std::vector<vision::FDMat> images;
+            for (size_t i = 0; i < im_list.size(); ++i) {
+              images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+            }
+            std::vector<FDTensor> outputs;
+            std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+            if (!self.Run(&images, &outputs, &ims_info)) {
+              throw std::runtime_error(
+                  "Failed to preprocess the input data in YOLOv8Preprocessor.");
+            }
+            for (size_t i = 0; i < outputs.size(); ++i) {
+              outputs[i].StopSharing();
+            }
+            return make_pair(outputs, ims_info);
+          })
+      .def_property("size", &vision::detection::YOLOv8Preprocessor::GetSize,
+                    &vision::detection::YOLOv8Preprocessor::SetSize)
+      .def_property("padding_value",
+                    &vision::detection::YOLOv8Preprocessor::GetPaddingValue,
+                    &vision::detection::YOLOv8Preprocessor::SetPaddingValue)
+      .def_property("is_scale_up",
+                    &vision::detection::YOLOv8Preprocessor::GetScaleUp,
+                    &vision::detection::YOLOv8Preprocessor::SetScaleUp)
+      .def_property("is_mini_pad",
+                    &vision::detection::YOLOv8Preprocessor::GetMiniPad,
+                    &vision::detection::YOLOv8Preprocessor::SetMiniPad)
+      .def_property("stride", &vision::detection::YOLOv8Preprocessor::GetStride,
+                    &vision::detection::YOLOv8Preprocessor::SetStride);
+
+  pybind11::class_<vision::detection::YOLOv8Postprocessor>(
+      m, "YOLOv8Postprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::detection::YOLOv8Postprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "YOLOv8Postprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::detection::YOLOv8Postprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::DetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "YOLOv8Postprocessor.");
+             }
+             return results;
+           })
+      .def_property("conf_threshold",
+                    &vision::detection::YOLOv8Postprocessor::GetConfThreshold,
+                    &vision::detection::YOLOv8Postprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::detection::YOLOv8Postprocessor::GetNMSThreshold,
+                    &vision::detection::YOLOv8Postprocessor::SetNMSThreshold)
+      .def_property("multi_label",
+                    &vision::detection::YOLOv8Postprocessor::GetMultiLabel,
+                    &vision::detection::YOLOv8Postprocessor::SetMultiLabel);
+
+  pybind11::class_<vision::detection::YOLOv8, UltraInferModel>(m, "YOLOv8")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv8 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::YOLOv8 &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::YOLOv8::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::YOLOv8::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc
new file mode 100755
index 0000000000..0ce66c0f9d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.cc
@@ -0,0 +1,322 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/contrib/yolox.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace detection {
+
+struct YOLOXAnchor {
+  int grid0;
+  int grid1;
+  int stride;
+};
+
+void GenerateYOLOXAnchors(const std::vector<int> &size,
+                          const std::vector<int> &downsample_strides,
+                          std::vector<YOLOXAnchor> *anchors) {
+  // size: tuple of input (width, height)
+  // downsample_strides: downsample strides in YOLOX, e.g (8,16,32)
+  const int width = size[0];
+  const int height = size[1];
+  for (const auto &ds : downsample_strides) {
+    int num_grid_w = width / ds;
+    int num_grid_h = height / ds;
+    for (int g1 = 0; g1 < num_grid_h; ++g1) {
+      for (int g0 = 0; g0 < num_grid_w; ++g0) {
+        (*anchors).emplace_back(YOLOXAnchor{g0, g1, ds});
+      }
+    }
+  }
+}
+
+void LetterBoxWithRightBottomPad(Mat *mat, std::vector<int> size,
+                                 std::vector<float> color) {
+  // specific pre process for YOLOX, not the same as YOLOv5
+  // reference: YOLOX/yolox/data/data_augment.py#L142
+  float r = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                     size[0] * 1.0f / static_cast<float>(mat->Width()));
+
+  int resize_h = int(round(static_cast<float>(mat->Height()) * r));
+  int resize_w = int(round(static_cast<float>(mat->Width()) * r));
+
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  // right-bottom padding for YOLOX
+  if (pad_h > 0 || pad_w > 0) {
+    int top = 0;
+    int left = 0;
+    int right = pad_w;
+    int bottom = pad_h;
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+YOLOX::YOLOX(const std::string &model_file, const std::string &params_file,
+             const RuntimeOption &custom_option,
+             const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOX::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  downsample_strides = {8, 16, 32};
+  max_wh = 4096.0f;
+  is_decode_exported = false;
+  reused_input_tensors_.resize(1);
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  return true;
+}
+
+bool YOLOX::Preprocess(Mat *mat, FDTensor *output,
+                       std::map<std::string, std::array<float, 2>> *im_info) {
+  // YOLOX ( >= v0.1.1) preprocess steps
+  // 1. preproc
+  // 2. HWC->CHW
+  // 3. NO!!! BRG2GRB and Normalize needed in YOLOX
+  LetterBoxWithRightBottomPad(mat, size, padding_value);
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOX::Postprocess(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    int s = i * infer_result.shape[2];
+    float confidence = data[s + 4];
+    float *max_class_score =
+        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+    confidence *= (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(data + s + 5, max_class_score);
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
+        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
+        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float r = std::min(out_h / ipt_h, out_w / ipt_w);
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f);
+    result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f);
+    result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f);
+    result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOX::PostprocessWithDecode(
+    FDTensor &infer_result, DetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  result->Clear();
+  result->Reserve(infer_result.shape[1]);
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  // generate anchors with dowmsample strides
+  std::vector<YOLOXAnchor> anchors;
+  GenerateYOLOXAnchors(size, downsample_strides, &anchors);
+
+  // infer_result shape might look like (1,n,85=5+80)
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    int s = i * infer_result.shape[2];
+    float confidence = data[s + 4];
+    float *max_class_score =
+        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+    confidence *= (*max_class_score);
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    int32_t label_id = std::distance(data + s + 5, max_class_score);
+    // fetch i-th anchor
+    float grid0 = static_cast<float>(anchors.at(i).grid0);
+    float grid1 = static_cast<float>(anchors.at(i).grid1);
+    float downsample_stride = static_cast<float>(anchors.at(i).stride);
+    // convert from offsets to [x, y, w, h]
+    float dx = data[s];
+    float dy = data[s + 1];
+    float dw = data[s + 2];
+    float dh = data[s + 3];
+
+    float x = (dx + grid0) * downsample_stride;
+    float y = (dy + grid1) * downsample_stride;
+    float w = std::exp(dw) * downsample_stride;
+    float h = std::exp(dh) * downsample_stride;
+
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh,
+        x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh});
+    // label_id * max_wh for multi classes NMS
+    result->label_ids.push_back(label_id);
+    result->scores.push_back(confidence);
+  }
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float r = std::min(out_h / ipt_h, out_w / ipt_w);
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    int32_t label_id = (result->label_ids)[i];
+    // clip box
+    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
+    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
+    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
+    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
+    result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f);
+    result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f);
+    result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f);
+    result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOX::Predict(cv::Mat *im, DetectionResult *result, float conf_threshold,
+                    float nms_iou_threshold) {
+  Mat mat(*im);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer()) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (is_decode_exported) {
+    if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
+                     nms_iou_threshold)) {
+      FDERROR << "Failed to post process." << std::endl;
+      return false;
+    }
+  } else {
+    if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info,
+                               conf_threshold, nms_iou_threshold)) {
+      FDERROR << "Failed to post process." << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h
new file mode 100755
index 0000000000..b314a58b56
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox.h
@@ -0,0 +1,106 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace detection {
+/*! @brief YOLOX model object used when to load a YOLOX model exported by YOLOX.
+ */
+class ULTRAINFER_DECL YOLOX : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolox.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOX(const std::string &model_file, const std::string &params_file = "",
+        const RuntimeOption &custom_option = RuntimeOption(),
+        const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "YOLOX"; }
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * will be writen to this structure \param[in] conf_threshold confidence
+   * threashold for postprocessing, default is 0.25 \param[in] nms_iou_threshold
+   * iou threashold for NMS, default is 0.5 \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result,
+                       float conf_threshold = 0.25,
+                       float nms_iou_threshold = 0.5);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {640, 640}
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value;
+  /*! @brief
+    whether the model_file was exported with decode module. The official
+    YOLOX/tools/export_onnx.py script will export ONNX file without
+    decode module. Please set it 'true' manually if the model file
+    was exported with decode module. default false.
+  */
+  bool is_decode_exported;
+  // downsample strides for YOLOX to generate anchors,
+  // will take (8,16,32) as default values, might have stride=64
+  std::vector<int> downsample_strides;
+  // for offseting the boxes by classes when using NMS, default 4096
+  float max_wh;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, DetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  bool PostprocessWithDecode(
+      FDTensor &infer_result, DetectionResult *result,
+      const std::map<std::string, std::array<float, 2>> &im_info,
+      float conf_threshold, float nms_iou_threshold);
+
+  bool IsDynamicInput() const { return is_dynamic_input_; }
+
+  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
+  // or not.)
+  // megvii/YOLOX official 'export_onnx.py' script will export static ONNX by
+  // default.
+  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
+  // value will
+  // auto check by ultrainfer after the internal Runtime already initialized.
+  bool is_dynamic_input_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc
new file mode 100755
index 0000000000..38f7efce1a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/contrib/yolox_pybind.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOX(pybind11::module &m) {
+  pybind11::class_<vision::detection::YOLOX, UltraInferModel>(m, "YOLOX")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOX &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::detection::YOLOX::size)
+      .def_readwrite("padding_value", &vision::detection::YOLOX::padding_value)
+      .def_readwrite("is_decode_exported",
+                     &vision::detection::YOLOX::is_decode_exported)
+      .def_readwrite("downsample_strides",
+                     &vision::detection::YOLOX::downsample_strides)
+      .def_readwrite("max_wh", &vision::detection::YOLOX::max_wh);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc
new file mode 100755
index 0000000000..4b357406ab
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/detection_pybind.cc
@@ -0,0 +1,54 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindYOLOv7(pybind11::module &m);
+void BindScaledYOLOv4(pybind11::module &m);
+void BindYOLOR(pybind11::module &m);
+void BindYOLOv6(pybind11::module &m);
+void BindYOLOv5Lite(pybind11::module &m);
+void BindYOLOv5(pybind11::module &m);
+void BindYOLOv5Seg(pybind11::module &m);
+void BindFastestDet(pybind11::module &m);
+void BindYOLOX(pybind11::module &m);
+void BindNanoDetPlus(pybind11::module &m);
+void BindPPDet(pybind11::module &m);
+void BindYOLOv7End2EndTRT(pybind11::module &m);
+void BindYOLOv7End2EndORT(pybind11::module &m);
+void BindYOLOv8(pybind11::module &m);
+void BindRKYOLO(pybind11::module &m);
+
+void BindDetection(pybind11::module &m) {
+  auto detection_module =
+      m.def_submodule("detection", "Image object detection models.");
+  BindPPDet(detection_module);
+  BindYOLOv7(detection_module);
+  BindScaledYOLOv4(detection_module);
+  BindYOLOR(detection_module);
+  BindYOLOv6(detection_module);
+  BindYOLOv5Lite(detection_module);
+  BindYOLOv5(detection_module);
+  BindYOLOv5Seg(detection_module);
+  BindFastestDet(detection_module);
+  BindYOLOX(detection_module);
+  BindNanoDetPlus(detection_module);
+  BindYOLOv7End2EndTRT(detection_module);
+  BindYOLOv7End2EndORT(detection_module);
+  BindYOLOv8(detection_module);
+  BindRKYOLO(detection_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc
new file mode 100755
index 0000000000..56564411f2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.cc
@@ -0,0 +1,108 @@
+#include "ultrainfer/vision/detection/ppdet/base.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/utils/utils.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+PPDetBase::PPDetBase(const std::string &model_file,
+                     const std::string &params_file,
+                     const std::string &config_file,
+                     const RuntimeOption &custom_option,
+                     const ModelFormat &model_format)
+    : preprocessor_(config_file), postprocessor_(preprocessor_.GetArch()) {
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+}
+
+std::unique_ptr<PPDetBase> PPDetBase::Clone() const {
+  std::unique_ptr<PPDetBase> clone_model =
+      ultrainfer::utils::make_unique<PPDetBase>(PPDetBase(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool PPDetBase::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPDetBase::Predict(cv::Mat *im, DetectionResult *result) {
+  return Predict(*im, result);
+}
+
+bool PPDetBase::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool PPDetBase::BatchPredict(const std::vector<cv::Mat> &imgs,
+                             std::vector<DetectionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(imgs);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+  reused_input_tensors_[0].name = "image";
+  reused_input_tensors_[1].name = "scale_factor";
+  reused_input_tensors_[2].name = "im_shape";
+
+  if (NumInputsOfRuntime() == 1) {
+    auto scale_factor = static_cast<float *>(reused_input_tensors_[1].Data());
+    postprocessor_.SetScaleFactor({scale_factor[0], scale_factor[1]});
+  }
+
+  // Some models don't need scale_factor and im_shape as input
+  while (reused_input_tensors_.size() != NumInputsOfRuntime()) {
+    reused_input_tensors_.pop_back();
+  }
+
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPDetBase::CheckArch() {
+  // Add "PicoDet" arch for backward compability with the
+  // old ppdet model, such as picodet from PaddleClas
+  // PP-ShiTuV2 pipeline.
+  std::vector<std::string> archs = {
+      "SOLOv2", "YOLO",   "SSD",    "RetinaNet", "RCNN",   "Face",
+      "GFL",    "YOLOX",  "YOLOv5", "YOLOv6",    "YOLOv7", "RTMDet",
+      "FCOS",   "TTFNet", "TOOD",   "DETR",      "PicoDet"};
+  auto arch_ = preprocessor_.GetArch();
+  for (auto item : archs) {
+    if (arch_ == item) {
+      return true;
+    }
+  }
+  FDWARNING << "Please set model arch,"
+            << "support value : SOLOv2, YOLO, SSD, RetinaNet, "
+            << "RCNN, Face , GFL , RTMDet ,"
+            << "FCOS , TTFNet , TOOD , DETR, PicoDet" << std::endl;
+  return false;
+}
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h
new file mode 100755
index 0000000000..57b0a210a3
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/base.h
@@ -0,0 +1,100 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/detection/ppdet/postprocessor.h"
+#include "ultrainfer/vision/detection/ppdet/preprocessor.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All object detection model APIs are defined inside this namespace
+ *
+ */
+namespace detection {
+
+/*! @brief Base model object used when to load a model exported by
+ * PaddleDetection
+ */
+class ULTRAINFER_DECL PPDetBase : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g ppyoloe/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g
+   * ppyoloe/infer_cfg.yml \param[in] custom_option RuntimeOption for inference,
+   * the default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  PPDetBase(const std::string &model_file, const std::string &params_file,
+            const std::string &config_file,
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new PaddleDetModel with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new PaddleDetModel* type unique pointer
+   */
+  virtual std::unique_ptr<PPDetBase> Clone() const;
+
+  /// Get model's name
+  virtual std::string ModelName() const { return "PaddleDetection/BaseModel"; }
+
+  /** \brief DEPRECATED Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result);
+
+  /** \brief Predict the detection result for an input image
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, DetectionResult *result);
+
+  /** \brief Predict the detection result for an input image list
+   * \param[in] im The input image list, all the elements come from
+   * cv::imread(), is a 3-D array with layout HWC, BGR format \param[in] results
+   * The output detection result list \return true if the prediction successed,
+   * otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  PaddleDetPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  PaddleDetPostprocessor &GetPostprocessor() { return postprocessor_; }
+  virtual bool CheckArch();
+
+protected:
+  virtual bool Initialize();
+  PaddleDetPreprocessor preprocessor_;
+  PaddleDetPostprocessor postprocessor_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h
new file mode 100755
index 0000000000..09c2001e9c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/model.h
@@ -0,0 +1,508 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/detection/ppdet/base.h"
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h"
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+class ULTRAINFER_DECL PicoDet : public PPDetBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g picodet/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g picodet/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g
+   * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference,
+   * the default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  PicoDet(const std::string &model_file, const std::string &params_file,
+          const std::string &config_file,
+          const RuntimeOption &custom_option = RuntimeOption(),
+          const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_timvx_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PicoDet"; }
+};
+
+class ULTRAINFER_DECL SOLOv2 : public PPDetBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g picodet/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g picodet/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g
+   * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference,
+   * the default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  SOLOv2(const std::string &model_file, const std::string &params_file,
+         const std::string &config_file,
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER, Backend::TRT};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "SOLOv2"; }
+};
+
+class ULTRAINFER_DECL PPYOLOE : public PPDetBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g ppyoloe/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g picodet/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g
+   * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference,
+   * the default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  PPYOLOE(const std::string &model_file, const std::string &params_file,
+          const std::string &config_file,
+          const RuntimeOption &custom_option = RuntimeOption(),
+          const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
+                          Backend::LITE, Backend::TVM};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_timvx_backends = {Backend::LITE};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_horizon_backends = {Backend::HORIZONNPU};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PPYOLOE"; }
+};
+
+class ULTRAINFER_DECL PPYOLO : public PPDetBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g ppyolo/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g ppyolo/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g
+   * picodet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference,
+   * the default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  PPYOLO(const std::string &model_file, const std::string &params_file,
+         const std::string &config_file,
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/PP-YOLO"; }
+};
+
+class ULTRAINFER_DECL YOLOv3 : public PPDetBase {
+public:
+  YOLOv3(const std::string &model_file, const std::string &params_file,
+         const std::string &config_file,
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/YOLOv3"; }
+};
+
+class ULTRAINFER_DECL PaddleYOLOX : public PPDetBase {
+public:
+  PaddleYOLOX(const std::string &model_file, const std::string &params_file,
+              const std::string &config_file,
+              const RuntimeOption &custom_option = RuntimeOption(),
+              const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/YOLOX"; }
+};
+
+class ULTRAINFER_DECL FasterRCNN : public PPDetBase {
+public:
+  FasterRCNN(const std::string &model_file, const std::string &params_file,
+             const std::string &config_file,
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER};
+    valid_kunlunxin_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/FasterRCNN"; }
+};
+
+class ULTRAINFER_DECL MaskRCNN : public PPDetBase {
+public:
+  MaskRCNN(const std::string &model_file, const std::string &params_file,
+           const std::string &config_file,
+           const RuntimeOption &custom_option = RuntimeOption(),
+           const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER};
+    valid_kunlunxin_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/MaskRCNN"; }
+};
+
+class ULTRAINFER_DECL SSD : public PPDetBase {
+public:
+  SSD(const std::string &model_file, const std::string &params_file,
+      const std::string &config_file,
+      const RuntimeOption &custom_option = RuntimeOption(),
+      const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/SSD"; }
+};
+
+class ULTRAINFER_DECL PaddleYOLOv5 : public PPDetBase {
+public:
+  PaddleYOLOv5(const std::string &model_file, const std::string &params_file,
+               const std::string &config_file,
+               const RuntimeOption &custom_option = RuntimeOption(),
+               const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/YOLOv5"; }
+};
+
+class ULTRAINFER_DECL PaddleYOLOv6 : public PPDetBase {
+public:
+  PaddleYOLOv6(const std::string &model_file, const std::string &params_file,
+               const std::string &config_file,
+               const RuntimeOption &custom_option = RuntimeOption(),
+               const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/YOLOv6"; }
+};
+
+class ULTRAINFER_DECL PaddleYOLOv7 : public PPDetBase {
+public:
+  PaddleYOLOv7(const std::string &model_file, const std::string &params_file,
+               const std::string &config_file,
+               const RuntimeOption &custom_option = RuntimeOption(),
+               const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/YOLOv7"; }
+};
+
+class ULTRAINFER_DECL PaddleYOLOv8 : public PPDetBase {
+public:
+  PaddleYOLOv8(const std::string &model_file, const std::string &params_file,
+               const std::string &config_file,
+               const RuntimeOption &custom_option = RuntimeOption(),
+               const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/YOLOv8"; }
+};
+
+class ULTRAINFER_DECL RTMDet : public PPDetBase {
+public:
+  RTMDet(const std::string &model_file, const std::string &params_file,
+         const std::string &config_file,
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/RTMDet"; }
+};
+
+class ULTRAINFER_DECL CascadeRCNN : public PPDetBase {
+public:
+  CascadeRCNN(const std::string &model_file, const std::string &params_file,
+              const std::string &config_file,
+              const RuntimeOption &custom_option = RuntimeOption(),
+              const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const {
+    return "PaddleDetection/CascadeRCNN";
+  }
+};
+
+class ULTRAINFER_DECL PSSDet : public PPDetBase {
+public:
+  PSSDet(const std::string &model_file, const std::string &params_file,
+         const std::string &config_file,
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/PSSDet"; }
+};
+
+class ULTRAINFER_DECL RetinaNet : public PPDetBase {
+public:
+  RetinaNet(const std::string &model_file, const std::string &params_file,
+            const std::string &config_file,
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/RetinaNet"; }
+};
+
+class ULTRAINFER_DECL PPYOLOESOD : public PPDetBase {
+public:
+  PPYOLOESOD(const std::string &model_file, const std::string &params_file,
+             const std::string &config_file,
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/PPYOLOESOD"; }
+};
+
+class ULTRAINFER_DECL FCOS : public PPDetBase {
+public:
+  FCOS(const std::string &model_file, const std::string &params_file,
+       const std::string &config_file,
+       const RuntimeOption &custom_option = RuntimeOption(),
+       const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/FCOS"; }
+};
+
+class ULTRAINFER_DECL TTFNet : public PPDetBase {
+public:
+  TTFNet(const std::string &model_file, const std::string &params_file,
+         const std::string &config_file,
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/TTFNet"; }
+};
+
+class ULTRAINFER_DECL TOOD : public PPDetBase {
+public:
+  TOOD(const std::string &model_file, const std::string &params_file,
+       const std::string &config_file,
+       const RuntimeOption &custom_option = RuntimeOption(),
+       const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER};
+    valid_gpu_backends = {Backend::PDINFER};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/TOOD"; }
+};
+
+class ULTRAINFER_DECL GFL : public PPDetBase {
+public:
+  GFL(const std::string &model_file, const std::string &params_file,
+      const std::string &config_file,
+      const RuntimeOption &custom_option = RuntimeOption(),
+      const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetection/GFL"; }
+};
+
+class ULTRAINFER_DECL PaddleDetectionModel : public PPDetBase {
+public:
+  PaddleDetectionModel(const std::string &model_file,
+                       const std::string &params_file,
+                       const std::string &config_file,
+                       const RuntimeOption &custom_option = RuntimeOption(),
+                       const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    CheckArch();
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_timvx_backends = {Backend::LITE};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PaddleDetectionModel"; }
+};
+
+class ULTRAINFER_DECL PPYOLOER : public PPDetBase {
+public:
+  PPYOLOER(const std::string &model_file, const std::string &params_file,
+           const std::string &config_file,
+           const RuntimeOption &custom_option = RuntimeOption(),
+           const ModelFormat &model_format = ModelFormat::PADDLE)
+      : PPDetBase(model_file, params_file, config_file, custom_option,
+                  model_format) {
+    valid_cpu_backends = {Backend::PDINFER, Backend::OPENVINO, Backend::ORT,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_timvx_backends = {Backend::LITE};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PPYOLOER"; }
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc
new file mode 100755
index 0000000000..932049b7f5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.cc
@@ -0,0 +1,227 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h"
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include <algorithm>
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+template <class T>
+bool SortScorePairDescend(const std::pair<float, T> &pair1,
+                          const std::pair<float, T> &pair2) {
+  return pair1.first > pair2.first;
+}
+
+void GetMaxScoreIndex(const float *scores, const int &score_size,
+                      const float &threshold, const int &top_k,
+                      std::vector<std::pair<float, int>> *sorted_indices) {
+  for (size_t i = 0; i < score_size; ++i) {
+    if (scores[i] > threshold) {
+      sorted_indices->push_back(std::make_pair(scores[i], i));
+    }
+  }
+  // Sort the score pair according to the scores in descending order
+  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
+                   SortScorePairDescend<int>);
+  // Keep top_k scores if needed.
+  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
+    sorted_indices->resize(top_k);
+  }
+}
+
+float BBoxArea(const float *box, const bool &normalized) {
+  if (box[2] < box[0] || box[3] < box[1]) {
+    // If coordinate values are is invalid
+    // (e.g. xmax < xmin or ymax < ymin), return 0.
+    return 0.f;
+  } else {
+    const float w = box[2] - box[0];
+    const float h = box[3] - box[1];
+    if (normalized) {
+      return w * h;
+    } else {
+      // If coordinate values are not within range [0, 1].
+      return (w + 1) * (h + 1);
+    }
+  }
+}
+
+float JaccardOverlap(const float *box1, const float *box2,
+                     const bool &normalized) {
+  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
+      box2[3] < box1[1]) {
+    return 0.f;
+  } else {
+    const float inter_xmin = std::max(box1[0], box2[0]);
+    const float inter_ymin = std::max(box1[1], box2[1]);
+    const float inter_xmax = std::min(box1[2], box2[2]);
+    const float inter_ymax = std::min(box1[3], box2[3]);
+    float norm = normalized ? 0.0f : 1.0f;
+    float inter_w = inter_xmax - inter_xmin + norm;
+    float inter_h = inter_ymax - inter_ymin + norm;
+    const float inter_area = inter_w * inter_h;
+    const float bbox1_area = BBoxArea(box1, normalized);
+    const float bbox2_area = BBoxArea(box2, normalized);
+    return inter_area / (bbox1_area + bbox2_area - inter_area);
+  }
+}
+
+void PaddleMultiClassNMS::FastNMS(const float *boxes, const float *scores,
+                                  const int &num_boxes,
+                                  std::vector<int> *keep_indices) {
+  std::vector<std::pair<float, int>> sorted_indices;
+  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
+                   &sorted_indices);
+
+  float adaptive_threshold = nms_threshold;
+  while (sorted_indices.size() != 0) {
+    const int idx = sorted_indices.front().second;
+    bool keep = true;
+    for (size_t k = 0; k < keep_indices->size(); ++k) {
+      if (!keep) {
+        break;
+      }
+      const int kept_idx = (*keep_indices)[k];
+      float overlap =
+          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
+      keep = overlap <= adaptive_threshold;
+    }
+    if (keep) {
+      keep_indices->push_back(idx);
+    }
+    sorted_indices.erase(sorted_indices.begin());
+    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
+      adaptive_threshold *= nms_eta;
+    }
+  }
+}
+
+int PaddleMultiClassNMS::NMSForEachSample(
+    const float *boxes, const float *scores, int num_boxes, int num_classes,
+    std::map<int, std::vector<int>> *keep_indices) {
+  for (int i = 0; i < num_classes; ++i) {
+    if (i == background_label) {
+      continue;
+    }
+    const float *score_for_class_i = scores + i * num_boxes;
+    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
+  }
+  int num_det = 0;
+  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
+    num_det += iter->second.size();
+  }
+
+  if (keep_top_k > -1 && num_det > keep_top_k) {
+    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
+    for (const auto &it : *keep_indices) {
+      int label = it.first;
+      const float *current_score = scores + label * num_boxes;
+      auto &label_indices = it.second;
+      for (size_t j = 0; j < label_indices.size(); ++j) {
+        int idx = label_indices[j];
+        score_index_pairs.push_back(
+            std::make_pair(current_score[idx], std::make_pair(label, idx)));
+      }
+    }
+    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
+                     SortScorePairDescend<std::pair<int, int>>);
+    score_index_pairs.resize(keep_top_k);
+
+    std::map<int, std::vector<int>> new_indices;
+    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
+      int label = score_index_pairs[j].second.first;
+      int idx = score_index_pairs[j].second.second;
+      new_indices[label].push_back(idx);
+    }
+    new_indices.swap(*keep_indices);
+    num_det = keep_top_k;
+  }
+  return num_det;
+}
+
+void PaddleMultiClassNMS::Compute(const float *boxes_data,
+                                  const float *scores_data,
+                                  const std::vector<int64_t> &boxes_dim,
+                                  const std::vector<int64_t> &scores_dim) {
+  int score_size = scores_dim.size();
+
+  int64_t batch_size = scores_dim[0];
+  int64_t box_dim = boxes_dim[2];
+  int64_t out_dim = box_dim + 2;
+
+  int num_nmsed_out = 0;
+  FDASSERT(score_size == 3,
+           "Require rank of input scores be 3, but now it's %d.", score_size);
+  FDASSERT(boxes_dim[2] == 4,
+           "Require the 3-dimension of input boxes be 4, but now it's %lld.",
+           box_dim);
+  out_num_rois_data.resize(batch_size);
+
+  std::vector<std::map<int, std::vector<int>>> all_indices;
+  for (size_t i = 0; i < batch_size; ++i) {
+    std::map<int, std::vector<int>> indices; // indices kept for each class
+    const float *current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float *current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
+                               boxes_dim[1], scores_dim[1], &indices);
+    num_nmsed_out += num;
+    out_num_rois_data[i] = num;
+    all_indices.emplace_back(indices);
+  }
+  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
+  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
+  if (num_nmsed_out == 0) {
+    for (size_t i = 0; i < batch_size; ++i) {
+      out_num_rois_data[i] = 0;
+    }
+    return;
+  }
+  out_box_data.resize(num_nmsed_out * 6);
+  out_index_data.resize(num_nmsed_out);
+
+  int count = 0;
+  for (size_t i = 0; i < batch_size; ++i) {
+    const float *current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float *current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    for (const auto &it : all_indices[i]) {
+      int label = it.first;
+      const auto &indices = it.second;
+      const float *current_scores_class_ptr =
+          current_scores_ptr + label * scores_dim[2];
+      for (size_t j = 0; j < indices.size(); ++j) {
+        int start = count * 6;
+        out_box_data[start] = label;
+        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
+
+        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
+        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
+        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
+
+        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
+        out_index_data[count] = i * boxes_dim[1] + indices[j];
+        count += 1;
+      }
+    }
+  }
+}
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h
new file mode 100755
index 0000000000..392cf15325
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <map>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/** \brief Config for PaddleMultiClassNMS
+ * \param[in] background_label the value of background label
+ * \param[in] keep_top_k the value of keep_top_k
+ * \param[in] nms_eta the value of nms_eta
+ * \param[in] nms_threshold a dict that contains the arguments of nms operations
+ * \param[in] nms_top_k if there are more than max_num bboxes after NMS, only
+ * top max_num will be kept. \param[in] normalized Determine whether normalized
+ * is required \param[in] score_threshold bbox threshold, bboxes with scores
+ * lower than it will not be considered.
+ */
+struct NMSOption {
+  NMSOption() = default;
+  int64_t background_label = -1;
+  int64_t keep_top_k = 100;
+  float nms_eta = 1.0;
+  float nms_threshold = 0.5;
+  int64_t nms_top_k = 1000;
+  bool normalized = true;
+  float score_threshold = 0.3;
+};
+
+struct PaddleMultiClassNMS {
+  int64_t background_label = -1;
+  int64_t keep_top_k = -1;
+  float nms_eta;
+  float nms_threshold = 0.7;
+  int64_t nms_top_k;
+  bool normalized;
+  float score_threshold;
+
+  std::vector<int32_t> out_num_rois_data;
+  std::vector<int32_t> out_index_data;
+  std::vector<float> out_box_data;
+  void FastNMS(const float *boxes, const float *scores, const int &num_boxes,
+               std::vector<int> *keep_indices);
+  int NMSForEachSample(const float *boxes, const float *scores, int num_boxes,
+                       int num_classes,
+                       std::map<int, std::vector<int>> *keep_indices);
+  void Compute(const float *boxes, const float *scores,
+               const std::vector<int64_t> &boxes_dim,
+               const std::vector<int64_t> &scores_dim);
+
+  void SetNMSOption(const struct NMSOption &nms_option) {
+    background_label = nms_option.background_label;
+    keep_top_k = nms_option.keep_top_k;
+    nms_eta = nms_option.nms_eta;
+    nms_threshold = nms_option.nms_threshold;
+    nms_top_k = nms_option.nms_top_k;
+    normalized = nms_option.normalized;
+    score_threshold = nms_option.score_threshold;
+  }
+};
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc
new file mode 100755
index 0000000000..f9bc1fd275
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.cc
@@ -0,0 +1,468 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h"
+
+#include <algorithm>
+#include <cmath>
+#include <opencv2/opencv.hpp>
+#include <vector>
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+template <typename T> struct RotatedBox { T x_ctr, y_ctr, w, h, a; };
+
+template <typename T> struct Point {
+  T x, y;
+  Point(const T &px = 0, const T &py = 0) : x(px), y(py) {}
+  Point operator+(const Point &p) const { return Point(x + p.x, y + p.y); }
+  Point &operator+=(const Point &p) {
+    x += p.x;
+    y += p.y;
+    return *this;
+  }
+  Point operator-(const Point &p) const { return Point(x - p.x, y - p.y); }
+  Point operator*(const T coeff) const { return Point(x * coeff, y * coeff); }
+};
+
+template <typename T> T Dot2D(const Point<T> &A, const Point<T> &B) {
+  return A.x * B.x + A.y * B.y;
+}
+
+template <typename T> T Cross2D(const Point<T> &A, const Point<T> &B) {
+  return A.x * B.y - B.x * A.y;
+}
+
+template <typename T>
+int GetIntersectionPoints(const Point<T> (&pts1)[4], const Point<T> (&pts2)[4],
+                          Point<T> (&intersections)[24]) {
+  // Line vector
+  // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
+  Point<T> vec1[4], vec2[4];
+  for (int i = 0; i < 4; i++) {
+    vec1[i] = pts1[(i + 1) % 4] - pts1[i];
+    vec2[i] = pts2[(i + 1) % 4] - pts2[i];
+  }
+
+  // Line test - test all line combos for intersection
+  int num = 0; // number of intersections
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      // Solve for 2x2 Ax=b
+      T det = Cross2D<T>(vec2[j], vec1[i]);
+
+      // This takes care of parallel lines
+      if (fabs(det) <= 1e-14) {
+        continue;
+      }
+
+      auto vec12 = pts2[j] - pts1[i];
+
+      T t1 = Cross2D<T>(vec2[j], vec12) / det;
+      T t2 = Cross2D<T>(vec1[i], vec12) / det;
+
+      if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
+        intersections[num++] = pts1[i] + vec1[i] * t1;
+      }
+    }
+  }
+
+  // Check for vertices of rect1 inside rect2
+  {
+    const auto &AB = vec2[0];
+    const auto &DA = vec2[3];
+    auto ABdotAB = Dot2D<T>(AB, AB);
+    auto ADdotAD = Dot2D<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      // assume ABCD is the rectangle, and P is the point to be judged
+      // P is inside ABCD iff. P's projection on AB lies within AB
+      // and P's projection on AD lies within AD
+
+      auto AP = pts1[i] - pts2[0];
+
+      auto APdotAB = Dot2D<T>(AP, AB);
+      auto APdotAD = -Dot2D<T>(AP, DA);
+
+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+          (APdotAD <= ADdotAD)) {
+        intersections[num++] = pts1[i];
+      }
+    }
+  }
+
+  // Reverse the check - check for vertices of rect2 inside rect1
+  {
+    const auto &AB = vec1[0];
+    const auto &DA = vec1[3];
+    auto ABdotAB = Dot2D<T>(AB, AB);
+    auto ADdotAD = Dot2D<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      auto AP = pts2[i] - pts1[0];
+
+      auto APdotAB = Dot2D<T>(AP, AB);
+      auto APdotAD = -Dot2D<T>(AP, DA);
+
+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+          (APdotAD <= ADdotAD)) {
+        intersections[num++] = pts2[i];
+      }
+    }
+  }
+
+  return num;
+}
+
+template <typename T>
+int ConvexHullGraham(const Point<T> (&p)[24], const int &num_in,
+                     Point<T> (&q)[24], bool shift_to_zero = false) {
+  assert(num_in >= 2);
+
+  // Step 1:
+  // Find point with minimum y
+  // if more than 1 points have the same minimum y,
+  // pick the one with the minimum x.
+  int t = 0;
+  for (int i = 1; i < num_in; i++) {
+    if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
+      t = i;
+    }
+  }
+  auto &start = p[t]; // starting point
+
+  // Step 2:
+  // Subtract starting point from every points (for sorting in the next step)
+  for (int i = 0; i < num_in; i++) {
+    q[i] = p[i] - start;
+  }
+
+  // Swap the starting point to position 0
+  auto tmp = q[0];
+  q[0] = q[t];
+  q[t] = tmp;
+
+  // Step 3:
+  // Sort point 1 ~ num_in according to their relative cross-product values
+  // (essentially sorting according to angles)
+  // If the angles are the same, sort according to their distance to origin
+  T dist[24];
+  for (int i = 0; i < num_in; i++) {
+    dist[i] = Dot2D<T>(q[i], q[i]);
+  }
+
+  // CPU version
+  std::sort(q + 1, q + num_in,
+            [](const Point<T> &A, const Point<T> &B) -> bool {
+              T temp = Cross2D<T>(A, B);
+              if (fabs(temp) < 1e-6) {
+                return Dot2D<T>(A, A) < Dot2D<T>(B, B);
+              } else {
+                return temp > 0;
+              }
+            });
+
+  // Step 4:
+  // Make sure there are at least 2 points (that don't overlap with each other)
+  // in the stack
+  int k; // index of the non-overlapped second point
+  for (k = 1; k < num_in; k++) {
+    if (dist[k] > 1e-8) {
+      break;
+    }
+  }
+  if (k == num_in) {
+    // We reach the end, which means the convex hull is just one point
+    q[0] = p[t];
+    return 1;
+  }
+  q[1] = q[k];
+  int m = 2; // 2 points in the stack
+  // Step 5:
+  // Finally we can start the scanning process.
+  // When a non-convex relationship between the 3 points is found
+  // (either concave shape or duplicated points),
+  // we pop the previous point from the stack
+  // until the 3-point relationship is convex again, or
+  // until the stack only contains two points
+  for (int i = k + 1; i < num_in; i++) {
+    while (m > 1 && Cross2D<T>(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
+      m--;
+    }
+    q[m++] = q[i];
+  }
+
+  // Step 6 (Optional):
+  // In general sense we need the original coordinates, so we
+  // need to shift the points back (reverting Step 2)
+  // But if we're only interested in getting the area/perimeter of the shape
+  // We can simply return.
+  if (!shift_to_zero) {
+    for (int i = 0; i < m; i++) {
+      q[i] += start;
+    }
+  }
+
+  return m;
+}
+
+template <typename T> T PolygonArea(const Point<T> (&q)[24], const int &m) {
+  if (m <= 2) {
+    return 0;
+  }
+
+  T area = 0;
+  for (int i = 1; i < m - 1; i++) {
+    area += fabs(Cross2D<T>(q[i] - q[0], q[i + 1] - q[0]));
+  }
+
+  return area / 2.0;
+}
+
+template <typename T>
+T RboxesIntersection(T const *const poly1_raw, T const *const poly2_raw) {
+  // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
+  // from rotated_rect_intersection_pts
+  Point<T> intersectPts[24], orderedPts[24];
+
+  Point<T> pts1[4];
+
+  Point<T> pts2[4];
+  for (int i = 0; i < 4; i++) {
+    pts1[i] = Point<T>(poly1_raw[2 * i], poly1_raw[2 * i + 1]);
+    pts2[i] = Point<T>(poly2_raw[2 * i], poly2_raw[2 * i + 1]);
+  }
+
+  int num = GetIntersectionPoints<T>(pts1, pts2, intersectPts);
+  if (num <= 2) {
+    return 0.0;
+  }
+
+  // Convex Hull to order the intersection points in clockwise order and find
+  // the contour area.
+  int num_convex = ConvexHullGraham<T>(intersectPts, num, orderedPts, true);
+  return PolygonArea<T>(orderedPts, num_convex);
+}
+
+template <typename T> T PolyArea(T const *const poly_raw) {
+  T area = 0.0;
+  int j = 3;
+  for (int i = 0; i < 4; i++) {
+    // area += (x[j] + x[i]) * (y[j] - y[i]);
+    area += (poly_raw[2 * j] + poly_raw[2 * i]) *
+            (poly_raw[2 * j + 1] - poly_raw[2 * i + 1]);
+    j = i;
+  }
+  // return static_cast<T>(abs(static_cast<float>(area) / 2.0));
+  return std::abs(area / 2.0);
+}
+
+template <typename T>
+void Poly2Rbox(T const *const poly_raw, RotatedBox<T> &box) {
+  std::vector<cv::Point2f> contour_poly{
+      cv::Point2f(poly_raw[0], poly_raw[1]),
+      cv::Point2f(poly_raw[2], poly_raw[3]),
+      cv::Point2f(poly_raw[4], poly_raw[5]),
+      cv::Point2f(poly_raw[6], poly_raw[7]),
+  };
+  cv::RotatedRect rotate_rect = cv::minAreaRect(contour_poly);
+  box.x_ctr = rotate_rect.center.x;
+  box.y_ctr = rotate_rect.center.y;
+  box.w = rotate_rect.size.width;
+  box.h = rotate_rect.size.height;
+  box.a = rotate_rect.angle;
+}
+
+template <typename T>
+T RboxIouSingle(T const *const poly1_raw, T const *const poly2_raw) {
+  const T area1 = PolyArea(poly1_raw);
+  const T area2 = PolyArea(poly2_raw);
+
+  const T intersection = RboxesIntersection<T>(poly1_raw, poly2_raw);
+  const T iou = intersection / (area1 + area2 - intersection);
+  return iou;
+}
+
+template <typename T>
+bool SortScorePairDescendRotated(const std::pair<float, T> &pair1,
+                                 const std::pair<float, T> &pair2) {
+  return pair1.first > pair2.first;
+}
+
+void GetMaxScoreIndexRotated(
+    const float *scores, const int &score_size, const float &threshold,
+    const int &top_k, std::vector<std::pair<float, int>> *sorted_indices) {
+  for (size_t i = 0; i < score_size; ++i) {
+    if (scores[i] > threshold) {
+      sorted_indices->push_back(std::make_pair(scores[i], i));
+    }
+  }
+  // Sort the score pair according to the scores in descending order
+  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
+                   SortScorePairDescendRotated<int>);
+  // Keep top_k scores if needed.
+  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
+    sorted_indices->resize(top_k);
+  }
+}
+
+void PaddleMultiClassNMSRotated::FastNMSRotated(
+    const float *boxes, const float *scores, const int &num_boxes,
+    std::vector<int> *keep_indices) {
+  std::vector<std::pair<float, int>> sorted_indices;
+  GetMaxScoreIndexRotated(scores, num_boxes, score_threshold, nms_top_k,
+                          &sorted_indices);
+  // printf("nms thrd: %f, sort dim: %d\n", nms_threshold,
+  // int(sorted_indices.size()));
+  float adaptive_threshold = nms_threshold;
+  while (sorted_indices.size() != 0) {
+    const int idx = sorted_indices.front().second;
+    bool keep = true;
+    for (size_t k = 0; k < keep_indices->size(); ++k) {
+      if (!keep) {
+        break;
+      }
+      const int kept_idx = (*keep_indices)[k];
+      float overlap =
+          RboxIouSingle<float>(boxes + idx * 8, boxes + kept_idx * 8);
+
+      keep = overlap <= adaptive_threshold;
+    }
+    if (keep) {
+      keep_indices->push_back(idx);
+    }
+    sorted_indices.erase(sorted_indices.begin());
+    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
+      adaptive_threshold *= nms_eta;
+    }
+  }
+}
+
+int PaddleMultiClassNMSRotated::NMSRotatedForEachSample(
+    const float *boxes, const float *scores, int num_boxes, int num_classes,
+    std::map<int, std::vector<int>> *keep_indices) {
+  for (int i = 0; i < num_classes; ++i) {
+    if (i == background_label) {
+      continue;
+    }
+    const float *score_for_class_i = scores + i * num_boxes;
+    FastNMSRotated(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
+  }
+  int num_det = 0;
+  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
+    num_det += iter->second.size();
+  }
+
+  if (keep_top_k > -1 && num_det > keep_top_k) {
+    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
+    for (const auto &it : *keep_indices) {
+      int label = it.first;
+      const float *current_score = scores + label * num_boxes;
+      auto &label_indices = it.second;
+      for (size_t j = 0; j < label_indices.size(); ++j) {
+        int idx = label_indices[j];
+        score_index_pairs.push_back(
+            std::make_pair(current_score[idx], std::make_pair(label, idx)));
+      }
+    }
+
+    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
+                     SortScorePairDescendRotated<std::pair<int, int>>);
+    score_index_pairs.resize(keep_top_k);
+
+    std::map<int, std::vector<int>> new_indices;
+    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
+      int label = score_index_pairs[j].second.first;
+      int idx = score_index_pairs[j].second.second;
+      new_indices[label].push_back(idx);
+    }
+    new_indices.swap(*keep_indices);
+    num_det = keep_top_k;
+  }
+  return num_det;
+}
+
+void PaddleMultiClassNMSRotated::Compute(
+    const float *boxes_data, const float *scores_data,
+    const std::vector<int64_t> &boxes_dim,
+    const std::vector<int64_t> &scores_dim) {
+  int score_size = scores_dim.size();
+
+  int64_t batch_size = scores_dim[0];
+  int64_t box_dim = boxes_dim[2];
+  int64_t out_dim = box_dim + 2;
+
+  int num_nmsed_out = 0;
+  FDASSERT(score_size == 3,
+           "Require rank of input scores be 3, but now it's %d.", score_size);
+  FDASSERT(boxes_dim[2] == 8,
+           "Require the 3-dimension of input boxes be 8, but now it's %lld.",
+           box_dim);
+  out_num_rois_data.resize(batch_size);
+
+  std::vector<std::map<int, std::vector<int>>> all_indices;
+  for (size_t i = 0; i < batch_size; ++i) {
+    std::map<int, std::vector<int>> indices; // indices kept for each class
+    const float *current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float *current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    int num = NMSRotatedForEachSample(current_boxes_ptr, current_scores_ptr,
+                                      boxes_dim[1], scores_dim[1], &indices);
+    num_nmsed_out += num;
+    out_num_rois_data[i] = num;
+    all_indices.emplace_back(indices);
+  }
+  std::vector<int64_t> out_box_dims = {num_nmsed_out, 10};
+  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
+  if (num_nmsed_out == 0) {
+    for (size_t i = 0; i < batch_size; ++i) {
+      out_num_rois_data[i] = 0;
+    }
+    return;
+  }
+  out_box_data.resize(num_nmsed_out * 10);
+  out_index_data.resize(num_nmsed_out);
+
+  int count = 0;
+  for (size_t i = 0; i < batch_size; ++i) {
+    const float *current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float *current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    for (const auto &it : all_indices[i]) {
+      int label = it.first;
+      const auto &indices = it.second;
+      const float *current_scores_class_ptr =
+          current_scores_ptr + label * scores_dim[2];
+      for (size_t j = 0; j < indices.size(); ++j) {
+        int start = count * 10;
+        out_box_data[start] = label;
+        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
+        for (int k = 0; k < 8; k++) {
+          out_box_data[start + 2 + k] = current_boxes_ptr[indices[j] * 8 + k];
+        }
+        out_index_data[count] = i * boxes_dim[1] + indices[j];
+        count += 1;
+      }
+    }
+  }
+}
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h
new file mode 100755
index 0000000000..279276333b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <map>
+#include <string>
+#include <vector>
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/** \brief Config for PaddleMultiClassNMSRotated
+ * \param[in] background_label the value of background label
+ * \param[in] keep_top_k the value of keep_top_k
+ * \param[in] nms_eta the value of nms_eta
+ * \param[in] nms_threshold a dict that contains the arguments of nms operations
+ * \param[in] nms_top_k if there are more than max_num bboxes after NMS, only
+ * top max_num will be kept. \param[in] normalized Determine whether normalized
+ * is required \param[in] score_threshold bbox threshold, bboxes with scores
+ * lower than it will not be considered.
+ */
+struct NMSRotatedOption {
+  NMSRotatedOption() = default;
+  int64_t background_label = -1;
+  int64_t keep_top_k = -1;
+  float nms_eta = 1.0;
+  float nms_threshold = 0.1;
+  int64_t nms_top_k = 2000;
+  bool normalized = false;
+  float score_threshold = 0.1;
+};
+
+struct PaddleMultiClassNMSRotated {
+  int64_t background_label = -1;
+  int64_t keep_top_k = -1;
+  float nms_eta;
+  float nms_threshold = 0.1;
+  int64_t nms_top_k;
+  bool normalized;
+  float score_threshold;
+
+  std::vector<int32_t> out_num_rois_data;
+  std::vector<int32_t> out_index_data;
+  std::vector<float> out_box_data;
+  void FastNMSRotated(const float *boxes, const float *scores,
+                      const int &num_boxes, std::vector<int> *keep_indices);
+  int NMSRotatedForEachSample(const float *boxes, const float *scores,
+                              int num_boxes, int num_classes,
+                              std::map<int, std::vector<int>> *keep_indices);
+  void Compute(const float *ploy_boxes, const float *scores,
+               const std::vector<int64_t> &boxes_dim,
+               const std::vector<int64_t> &scores_dim);
+
+  void SetNMSRotatedOption(const struct NMSRotatedOption &nms_rotated_option) {
+    background_label = nms_rotated_option.background_label;
+    keep_top_k = nms_rotated_option.keep_top_k;
+    nms_eta = nms_rotated_option.nms_eta;
+    nms_threshold = nms_rotated_option.nms_threshold;
+    nms_top_k = nms_rotated_option.nms_top_k;
+    normalized = nms_rotated_option.normalized;
+    score_threshold = nms_rotated_option.score_threshold;
+  }
+};
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc
new file mode 100755
index 0000000000..e0b58d5da4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.cc
@@ -0,0 +1,362 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/ppdet/postprocessor.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+bool PaddleDetPostprocessor::ProcessMask(
+    const FDTensor &tensor, std::vector<DetectionResult> *results) {
+  auto shape = tensor.Shape();
+  int64_t out_mask_w = shape[2];
+  int64_t out_mask_numel = shape[1] * shape[2];
+  const auto *data = reinterpret_cast<const uint32_t *>(tensor.CpuData());
+  int index = 0;
+
+  for (int i = 0; i < results->size(); ++i) {
+    (*results)[i].contain_masks = true;
+    (*results)[i].masks.resize((*results)[i].boxes.size());
+    for (int j = 0; j < (*results)[i].boxes.size(); ++j) {
+      int x1 = static_cast<int>(round((*results)[i].boxes[j][0]));
+      int y1 = static_cast<int>(round((*results)[i].boxes[j][1]));
+      int x2 = static_cast<int>(round((*results)[i].boxes[j][2]));
+      int y2 = static_cast<int>(round((*results)[i].boxes[j][3]));
+      int keep_mask_h = y2 - y1;
+      int keep_mask_w = x2 - x1;
+      int keep_mask_numel = keep_mask_h * keep_mask_w;
+      (*results)[i].masks[j].Resize(keep_mask_numel);
+      (*results)[i].masks[j].shape = {keep_mask_h, keep_mask_w};
+      const uint32_t *current_ptr = data + index * out_mask_numel;
+
+      auto *keep_mask_ptr =
+          reinterpret_cast<uint32_t *>((*results)[i].masks[j].Data());
+      for (int row = y1; row < y2; ++row) {
+        size_t keep_nbytes_in_col = keep_mask_w * sizeof(uint32_t);
+        const uint32_t *out_row_start_ptr = current_ptr + row * out_mask_w + x1;
+        uint32_t *keep_row_start_ptr = keep_mask_ptr + (row - y1) * keep_mask_w;
+        std::memcpy(keep_row_start_ptr, out_row_start_ptr, keep_nbytes_in_col);
+      }
+      index += 1;
+    }
+  }
+  return true;
+}
+
+bool PaddleDetPostprocessor::ProcessWithNMS(
+    const std::vector<FDTensor> &tensors,
+    std::vector<DetectionResult> *results) {
+  // Get number of boxes for each input image
+  std::vector<int> num_boxes(tensors[1].shape[0]);
+  int total_num_boxes = 0;
+  if (tensors[1].dtype == FDDataType::INT32) {
+    const auto *data = static_cast<const int32_t *>(tensors[1].CpuData());
+    for (size_t i = 0; i < tensors[1].shape[0]; ++i) {
+      num_boxes[i] = static_cast<int>(data[i]);
+      total_num_boxes += num_boxes[i];
+    }
+  } else if (tensors[1].dtype == FDDataType::INT64) {
+    const auto *data = static_cast<const int64_t *>(tensors[1].CpuData());
+    for (size_t i = 0; i < tensors[1].shape[0]; ++i) {
+      num_boxes[i] = static_cast<int>(data[i]);
+      total_num_boxes += num_boxes[i];
+    }
+  }
+
+  // Special case for TensorRT, it has fixed output shape of NMS
+  // So there's invalid boxes in its' output boxes
+  int num_output_boxes = static_cast<int>(tensors[0].Shape()[0]);
+  bool contain_invalid_boxes = false;
+  if (total_num_boxes != num_output_boxes) {
+    if (num_output_boxes % num_boxes.size() == 0) {
+      contain_invalid_boxes = true;
+    } else {
+      FDERROR << "Cannot handle the output data for this model, unexpected "
+                 "situation."
+              << std::endl;
+      return false;
+    }
+  }
+
+  // Get boxes for each input image
+  results->resize(num_boxes.size());
+
+  if (tensors[0].shape[0] == 0) {
+    // No detected boxes
+    return true;
+  }
+
+  const auto *box_data = static_cast<const float *>(tensors[0].CpuData());
+  int offset = 0;
+  for (size_t i = 0; i < num_boxes.size(); ++i) {
+    const float *ptr = box_data + offset;
+    (*results)[i].Reserve(num_boxes[i]);
+    for (size_t j = 0; j < num_boxes[i]; ++j) {
+      (*results)[i].label_ids.push_back(
+          static_cast<int32_t>(round(ptr[j * 6])));
+      (*results)[i].scores.push_back(ptr[j * 6 + 1]);
+      (*results)[i].boxes.emplace_back(std::array<float, 4>(
+          {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]}));
+    }
+    if (contain_invalid_boxes) {
+      offset += static_cast<int>(num_output_boxes * 6 / num_boxes.size());
+    } else {
+      offset += static_cast<int>(num_boxes[i] * 6);
+    }
+  }
+  return true;
+}
+
+bool PaddleDetPostprocessor::ProcessWithoutNMS(
+    const std::vector<FDTensor> &tensors,
+    std::vector<DetectionResult> *results) {
+  int boxes_index = 0;
+  int scores_index = 1;
+
+  // Judge the index of the input Tensor
+  if (tensors[0].shape[1] == tensors[1].shape[2]) {
+    boxes_index = 0;
+    scores_index = 1;
+  } else if (tensors[0].shape[2] == tensors[1].shape[1]) {
+    boxes_index = 1;
+    scores_index = 0;
+  } else {
+    FDERROR << "The shape of boxes and scores should be [batch, boxes_num, "
+               "4], [batch, classes_num, boxes_num]"
+            << std::endl;
+    return false;
+  }
+
+  // do multi class nms
+  multi_class_nms_.Compute(
+      static_cast<const float *>(tensors[boxes_index].Data()),
+      static_cast<const float *>(tensors[scores_index].Data()),
+      tensors[boxes_index].shape, tensors[scores_index].shape);
+  auto num_boxes = multi_class_nms_.out_num_rois_data;
+  auto box_data =
+      static_cast<const float *>(multi_class_nms_.out_box_data.data());
+
+  // Get boxes for each input image
+  results->resize(num_boxes.size());
+  int offset = 0;
+  for (size_t i = 0; i < num_boxes.size(); ++i) {
+    const float *ptr = box_data + offset;
+    (*results)[i].Reserve(num_boxes[i]);
+    for (size_t j = 0; j < num_boxes[i]; ++j) {
+      (*results)[i].label_ids.push_back(
+          static_cast<int32_t>(round(ptr[j * 6])));
+      (*results)[i].scores.push_back(ptr[j * 6 + 1]);
+      (*results)[i].boxes.emplace_back(std::array<float, 4>(
+          {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]}));
+    }
+    offset += (num_boxes[i] * 6);
+  }
+
+  // do scale
+  if (GetScaleFactor()[0] != 0) {
+    for (auto &result : *results) {
+      for (auto &box : result.boxes) {
+        box[0] /= GetScaleFactor()[1];
+        box[1] /= GetScaleFactor()[0];
+        box[2] /= GetScaleFactor()[1];
+        box[3] /= GetScaleFactor()[0];
+      }
+    }
+  }
+  return true;
+}
+
+bool PaddleDetPostprocessor::ProcessSolov2(
+    const std::vector<FDTensor> &tensors,
+    std::vector<DetectionResult> *results) {
+  if (tensors.size() != 4) {
+    FDERROR << "The size of tensors for solov2 must be 4." << std::endl;
+    return false;
+  }
+
+  if (tensors[0].shape[0] != 1) {
+    FDERROR << "SOLOv2 temporarily only supports batch size is 1." << std::endl;
+    return false;
+  }
+
+  results->clear();
+  results->resize(1);
+
+  (*results)[0].contain_masks = true;
+
+  // tensor[0] means bbox data
+  const auto bbox_data = static_cast<const int *>(tensors[0].CpuData());
+  // tensor[1] means label data
+  const auto label_data_ = static_cast<const int64_t *>(tensors[1].CpuData());
+  // tensor[2] means score data
+  const auto score_data_ = static_cast<const float *>(tensors[2].CpuData());
+  // tensor[3] is mask data and its shape is the same as that of the image.
+  const auto mask_data_ = static_cast<const uint8_t *>(tensors[3].CpuData());
+
+  int rows = static_cast<int>(tensors[3].shape[1]);
+  int cols = static_cast<int>(tensors[3].shape[2]);
+  for (int bbox_id = 0; bbox_id < bbox_data[0]; ++bbox_id) {
+    if (score_data_[bbox_id] >= multi_class_nms_.score_threshold) {
+      DetectionResult &result_item = (*results)[0];
+      result_item.label_ids.emplace_back(label_data_[bbox_id]);
+      result_item.scores.emplace_back(score_data_[bbox_id]);
+
+      std::vector<int> global_mask;
+
+      for (int k = 0; k < rows * cols; ++k) {
+        global_mask.push_back(
+            static_cast<int>(mask_data_[k + bbox_id * rows * cols]));
+      }
+
+      // find minimize bounding box from mask
+      cv::Mat mask(rows, cols, CV_32SC1);
+
+      std::memcpy(mask.data, global_mask.data(),
+                  global_mask.size() * sizeof(int));
+
+      cv::Mat mask_fp;
+      mask.convertTo(mask_fp, CV_32FC1);
+
+      cv::Mat rowSum;
+      cv::Mat colSum;
+      std::vector<float> sum_of_row(rows);
+      std::vector<float> sum_of_col(cols);
+      cv::reduce(mask_fp, colSum, 0, cv::REDUCE_SUM, CV_32FC1);
+      cv::reduce(mask_fp, rowSum, 1, cv::REDUCE_SUM, CV_32FC1);
+
+      for (int row_id = 0; row_id < rows; ++row_id) {
+        sum_of_row[row_id] = rowSum.at<float>(row_id, 0);
+      }
+      for (int col_id = 0; col_id < cols; ++col_id) {
+        sum_of_col[col_id] = colSum.at<float>(0, col_id);
+      }
+
+      auto it = std::find_if(sum_of_row.begin(), sum_of_row.end(),
+                             [](int x) { return x > 0.5; });
+      float y1 = std::distance(sum_of_row.begin(), it);
+      auto it2 = std::find_if(sum_of_col.begin(), sum_of_col.end(),
+                              [](int x) { return x > 0.5; });
+      float x1 = std::distance(sum_of_col.begin(), it2);
+      auto rit = std::find_if(sum_of_row.rbegin(), sum_of_row.rend(),
+                              [](int x) { return x > 0.5; });
+      float y2 = std::distance(rit, sum_of_row.rend());
+      auto rit2 = std::find_if(sum_of_col.rbegin(), sum_of_col.rend(),
+                               [](int x) { return x > 0.5; });
+      float x2 = std::distance(rit2, sum_of_col.rend());
+      result_item.boxes.emplace_back(std::array<float, 4>({x1, y1, x2, y2}));
+    }
+  }
+  return true;
+}
+
+bool PaddleDetPostprocessor::ProcessPPYOLOER(
+    const std::vector<FDTensor> &tensors,
+    std::vector<DetectionResult> *results) {
+  if (tensors.size() != 2) {
+    FDERROR << "The size of tensors for PPYOLOER must be 2." << std::endl;
+    return false;
+  }
+
+  int boxes_index = 0;
+  int scores_index = 1;
+  multi_class_nms_rotated_.Compute(
+      static_cast<const float *>(tensors[boxes_index].Data()),
+      static_cast<const float *>(tensors[scores_index].Data()),
+      tensors[boxes_index].shape, tensors[scores_index].shape);
+  auto num_boxes = multi_class_nms_rotated_.out_num_rois_data;
+  auto box_data =
+      static_cast<const float *>(multi_class_nms_rotated_.out_box_data.data());
+
+  // Get boxes for each input image
+  results->resize(num_boxes.size());
+  int offset = 0;
+  for (size_t i = 0; i < num_boxes.size(); ++i) {
+    const float *ptr = box_data + offset;
+    (*results)[i].Reserve(num_boxes[i]);
+    for (size_t j = 0; j < num_boxes[i]; ++j) {
+      (*results)[i].label_ids.push_back(
+          static_cast<int32_t>(round(ptr[j * 10])));
+      (*results)[i].scores.push_back(ptr[j * 10 + 1]);
+      (*results)[i].rotated_boxes.push_back(std::array<float, 8>(
+          {ptr[j * 10 + 2], ptr[j * 10 + 3], ptr[j * 10 + 4], ptr[j * 10 + 5],
+           ptr[j * 10 + 6], ptr[j * 10 + 7], ptr[j * 10 + 8],
+           ptr[j * 10 + 9]}));
+    }
+    offset += (num_boxes[i] * 10);
+  }
+
+  // do scale
+  if (GetScaleFactor()[0] != 0) {
+    for (auto &result : *results) {
+      for (int i = 0; i < result.rotated_boxes.size(); i++) {
+        for (int j = 0; j < 8; j++) {
+          auto scale = i % 2 == 0 ? GetScaleFactor()[1] : GetScaleFactor()[0];
+          result.rotated_boxes[i][j] /= float(scale);
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+bool PaddleDetPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                                 std::vector<DetectionResult> *results) {
+  if (arch_ == "SOLOv2") {
+    // process for SOLOv2
+    ProcessSolov2(tensors, results);
+    // The fourth output of solov2 is mask
+    return ProcessMask(tensors[3], results);
+  } else {
+    if (tensors[0].Shape().size() == 3 &&
+        tensors[0].Shape()[2] == 8) { // PPYOLOER
+      return ProcessPPYOLOER(tensors, results);
+    }
+
+    // Do process according to whether NMS exists.
+    if (with_nms_) {
+      if (!ProcessWithNMS(tensors, results)) {
+        return false;
+      }
+    } else {
+      if (!ProcessWithoutNMS(tensors, results)) {
+        return false;
+      }
+    }
+
+    // for only detection
+    if (tensors.size() <= 2) {
+      return true;
+    }
+
+    // for maskrcnn
+    if (tensors[2].Shape()[0] != tensors[0].Shape()[0]) {
+      FDERROR << "The first dimension of output mask tensor:"
+              << tensors[2].Shape()[0]
+              << " is not equal to the first dimension of output boxes tensor:"
+              << tensors[0].Shape()[0] << "." << std::endl;
+      return false;
+    }
+
+    // The third output of mask-rcnn is mask
+    return ProcessMask(tensors[2], results);
+  }
+}
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h
new file mode 100755
index 0000000000..fb1d538d41
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/postprocessor.h
@@ -0,0 +1,117 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h"
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms_rotated.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+/*! @brief Postprocessor object for PaddleDet serials model.
+ */
+class ULTRAINFER_DECL PaddleDetPostprocessor {
+public:
+  PaddleDetPostprocessor() {
+    // There may be no NMS config in the yaml file,
+    // so we need to give a initial value to multi_class_nms_.
+    multi_class_nms_.SetNMSOption(NMSOption());
+    multi_class_nms_rotated_.SetNMSRotatedOption(NMSRotatedOption());
+  }
+
+  /** \brief Create a preprocessor instance for PaddleDet serials model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * ppyoloe/infer_cfg.yml
+   */
+  explicit PaddleDetPostprocessor(const std::string &arch) {
+    // Used to differentiate models
+    arch_ = arch;
+    // There may be no NMS config in the yaml file,
+    // so we need to give a initial value to multi_class_nms_.
+    multi_class_nms_.SetNMSOption(NMSOption());
+    multi_class_nms_rotated_.SetNMSRotatedOption(NMSRotatedOption());
+  }
+
+  /** \brief Process the result of runtime and fill to ClassifyResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<DetectionResult> *result);
+
+  /// Apply box decoding and nms step for the outputs for the model.This is
+  /// only available for those model exported without box decoding and nms.
+  void ApplyNMS() { with_nms_ = false; }
+
+  /// If you do not want to modify the Yaml configuration file,
+  /// you can use this function to set rotated NMS parameters.
+  void SetNMSRotatedOption(const NMSRotatedOption &option) {
+    multi_class_nms_rotated_.SetNMSRotatedOption(option);
+  }
+
+  /// If you do not want to modify the Yaml configuration file,
+  /// you can use this function to set NMS parameters.
+  void SetNMSOption(const NMSOption &option) {
+    multi_class_nms_.SetNMSOption(option);
+  }
+
+  // Set scale_factor_ value.This is only available for those model exported
+  // without nms.
+  void SetScaleFactor(const std::vector<float> &scale_factor_value) {
+    scale_factor_ = scale_factor_value;
+  }
+
+private:
+  std::vector<float> scale_factor_{0.0, 0.0};
+  std::vector<float> GetScaleFactor() { return scale_factor_; }
+
+  // for model without nms.
+  bool with_nms_ = true;
+
+  // Used to differentiate models
+  std::string arch_;
+
+  PaddleMultiClassNMS multi_class_nms_{};
+
+  PaddleMultiClassNMSRotated multi_class_nms_rotated_{};
+
+  // Process for General tensor without nms.
+  bool ProcessWithoutNMS(const std::vector<FDTensor> &tensors,
+                         std::vector<DetectionResult> *results);
+
+  // Process for General tensor with nms.
+  bool ProcessWithNMS(const std::vector<FDTensor> &tensors,
+                      std::vector<DetectionResult> *results);
+
+  // Process SOLOv2
+  bool ProcessSolov2(const std::vector<FDTensor> &tensors,
+                     std::vector<DetectionResult> *results);
+
+  // Process PPYOLOER
+  bool ProcessPPYOLOER(const std::vector<FDTensor> &tensors,
+                       std::vector<DetectionResult> *results);
+
+  // Process mask tensor for MaskRCNN
+  bool ProcessMask(const FDTensor &tensor,
+                   std::vector<DetectionResult> *results);
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc
new file mode 100755
index 0000000000..47120a2fc4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/ppdet_pybind.cc
@@ -0,0 +1,268 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPDet(pybind11::module &m) {
+  pybind11::class_<vision::detection::PaddleDetPreprocessor,
+                   vision::ProcessorManager>(m, "PaddleDetPreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run",
+           [](vision::detection::PaddleDetPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "PaddleDetPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def("disable_normalize",
+           [](vision::detection::PaddleDetPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::detection::PaddleDetPreprocessor &self) {
+             self.DisablePermute();
+           });
+
+  pybind11::class_<vision::detection::NMSOption>(m, "NMSOption")
+      .def(pybind11::init())
+      .def_readwrite("background_label",
+                     &vision::detection::NMSOption::background_label)
+      .def_readwrite("keep_top_k", &vision::detection::NMSOption::keep_top_k)
+      .def_readwrite("nms_eta", &vision::detection::NMSOption::nms_eta)
+      .def_readwrite("nms_threshold",
+                     &vision::detection::NMSOption::nms_threshold)
+      .def_readwrite("nms_top_k", &vision::detection::NMSOption::nms_top_k)
+      .def_readwrite("normalized", &vision::detection::NMSOption::normalized)
+      .def_readwrite("score_threshold",
+                     &vision::detection::NMSOption::score_threshold);
+
+  pybind11::class_<vision::detection::PaddleDetPostprocessor>(
+      m, "PaddleDetPostprocessor")
+      .def(pybind11::init<>())
+      .def(pybind11::init<std::string>())
+      .def("run",
+           [](vision::detection::PaddleDetPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::DetectionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PaddleDetPostprocessor.");
+             }
+             return results;
+           })
+      .def("set_nms_option",
+           [](vision::detection::PaddleDetPostprocessor &self,
+              vision::detection::NMSOption option) {
+             self.SetNMSOption(option);
+           })
+      .def("set_nms_rotated_option",
+           [](vision::detection::PaddleDetPostprocessor &self,
+              vision::detection::NMSRotatedOption option) {
+             self.SetNMSRotatedOption(option);
+           })
+      .def("apply_nms",
+           [](vision::detection::PaddleDetPostprocessor &self) {
+             self.ApplyNMS();
+           })
+      .def("run", [](vision::detection::PaddleDetPostprocessor &self,
+                     std::vector<pybind11::array> &input_array) {
+        std::vector<vision::DetectionResult> results;
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error(
+              "Failed to postprocess the runtime result in "
+              "PaddleDetPostprocessor.");
+        }
+        return results;
+      });
+
+  pybind11::class_<vision::detection::PPDetBase, UltraInferModel>(m,
+                                                                  "PPDetBase")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::PPDetBase &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::detection::PPDetBase &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::DetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def("clone",
+           [](vision::detection::PPDetBase &self) { return self.Clone(); })
+      .def_property_readonly("preprocessor",
+                             &vision::detection::PPDetBase::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::detection::PPDetBase::GetPostprocessor);
+
+  pybind11::class_<vision::detection::PPYOLO, vision::detection::PPDetBase>(
+      m, "PPYOLO")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PPYOLOE, vision::detection::PPDetBase>(
+      m, "PPYOLOE")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PicoDet, vision::detection::PPDetBase>(
+      m, "PicoDet")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PaddleYOLOX,
+                   vision::detection::PPDetBase>(m, "PaddleYOLOX")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::FasterRCNN, vision::detection::PPDetBase>(
+      m, "FasterRCNN")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::YOLOv3, vision::detection::PPDetBase>(
+      m, "YOLOv3")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::MaskRCNN, vision::detection::PPDetBase>(
+      m, "MaskRCNN")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::SSD, vision::detection::PPDetBase>(m,
+                                                                         "SSD")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PaddleYOLOv5,
+                   vision::detection::PPDetBase>(m, "PaddleYOLOv5")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PaddleYOLOv6,
+                   vision::detection::PPDetBase>(m, "PaddleYOLOv6")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PaddleYOLOv7,
+                   vision::detection::PPDetBase>(m, "PaddleYOLOv7")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PaddleYOLOv8,
+                   vision::detection::PPDetBase>(m, "PaddleYOLOv8")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::RTMDet, vision::detection::PPDetBase>(
+      m, "RTMDet")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::CascadeRCNN,
+                   vision::detection::PPDetBase>(m, "CascadeRCNN")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PSSDet, vision::detection::PPDetBase>(
+      m, "PSSDet")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::RetinaNet, vision::detection::PPDetBase>(
+      m, "RetinaNet")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PPYOLOESOD, vision::detection::PPDetBase>(
+      m, "PPYOLOESOD")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::FCOS, vision::detection::PPDetBase>(
+      m, "FCOS")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::TTFNet, vision::detection::PPDetBase>(
+      m, "TTFNet")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::TOOD, vision::detection::PPDetBase>(
+      m, "TOOD")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::GFL, vision::detection::PPDetBase>(m,
+                                                                         "GFL")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::SOLOv2, vision::detection::PPDetBase>(
+      m, "SOLOv2")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PaddleDetectionModel,
+                   vision::detection::PPDetBase>(m, "PaddleDetectionModel")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::PPYOLOER, vision::detection::PPDetBase>(
+      m, "PPYOLOER")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::detection::NMSRotatedOption>(m, "NMSRotatedOption")
+      .def(pybind11::init())
+      .def_readwrite("background_label",
+                     &vision::detection::NMSRotatedOption::background_label)
+      .def_readwrite("keep_top_k",
+                     &vision::detection::NMSRotatedOption::keep_top_k)
+      .def_readwrite("nms_eta", &vision::detection::NMSRotatedOption::nms_eta)
+      .def_readwrite("nms_threshold",
+                     &vision::detection::NMSRotatedOption::nms_threshold)
+      .def_readwrite("nms_top_k",
+                     &vision::detection::NMSRotatedOption::nms_top_k)
+      .def_readwrite("normalized",
+                     &vision::detection::NMSRotatedOption::normalized)
+      .def_readwrite("score_threshold",
+                     &vision::detection::NMSRotatedOption::score_threshold);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc
new file mode 100755
index 0000000000..87153c78a8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.cc
@@ -0,0 +1,228 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/detection/ppdet/preprocessor.h"
+
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/function/pad.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace detection {
+
+PaddleDetPreprocessor::PaddleDetPreprocessor(const std::string &config_file) {
+  this->config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create PaddleDetPreprocessor.");
+  initialized_ = true;
+}
+
+bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  // read for postprocess
+  if (cfg["arch"].IsDefined()) {
+    arch_ = cfg["arch"].as<std::string>();
+  } else {
+    FDERROR << "Please set model arch,"
+            << "support value : SOLOv2, YOLO, SSD, RetinaNet, RCNN, Face."
+            << std::endl;
+    return false;
+  }
+
+  // read for preprocess
+  processors_.push_back(std::make_shared<BGR2RGB>());
+
+  bool has_permute = false;
+  for (const auto &op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "NormalizeImage") {
+      if (!disable_normalize_) {
+        auto mean = op["mean"].as<std::vector<float>>();
+        auto std = op["std"].as<std::vector<float>>();
+        bool is_scale = true;
+        if (op["is_scale"]) {
+          is_scale = op["is_scale"].as<bool>();
+        }
+        std::string norm_type = "mean_std";
+        if (op["norm_type"]) {
+          norm_type = op["norm_type"].as<std::string>();
+        }
+        if (norm_type != "mean_std") {
+          std::fill(mean.begin(), mean.end(), 0.0);
+          std::fill(std.begin(), std.end(), 1.0);
+        }
+        processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+      }
+    } else if (op_name == "Resize") {
+      bool keep_ratio = op["keep_ratio"].as<bool>();
+      auto target_size = op["target_size"].as<std::vector<int>>();
+      int interp = op["interp"].as<int>();
+      FDASSERT(target_size.size() == 2,
+               "Require size of target_size be 2, but now it's %lu.",
+               target_size.size());
+      if (!keep_ratio) {
+        int width = target_size[1];
+        int height = target_size[0];
+        processors_.push_back(
+            std::make_shared<Resize>(width, height, -1.0, -1.0, interp, false));
+      } else {
+        int min_target_size = std::min(target_size[0], target_size[1]);
+        int max_target_size = std::max(target_size[0], target_size[1]);
+        std::vector<int> max_size;
+        if (max_target_size > 0) {
+          max_size.push_back(max_target_size);
+          max_size.push_back(max_target_size);
+        }
+        processors_.push_back(std::make_shared<ResizeByShort>(
+            min_target_size, interp, true, max_size));
+      }
+    } else if (op_name == "Permute") {
+      // Do nothing, do permute as the last operation
+      has_permute = true;
+      continue;
+    } else if (op_name == "Pad") {
+      auto size = op["size"].as<std::vector<int>>();
+      auto value = op["fill_value"].as<std::vector<float>>();
+      processors_.push_back(
+          std::make_shared<PadToSize>(size[1], size[0], value));
+    } else if (op_name == "PadStride") {
+      auto stride = op["stride"].as<int>();
+      processors_.push_back(
+          std::make_shared<StridePad>(stride, std::vector<float>(3, 0)));
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+  if (!disable_permute_) {
+    if (has_permute) {
+      // permute = cast<float> + HWC2CHW
+      processors_.push_back(std::make_shared<Cast>("float"));
+      processors_.push_back(std::make_shared<HWC2CHW>());
+    }
+  }
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+
+  return true;
+}
+
+bool PaddleDetPreprocessor::Apply(FDMatBatch *image_batch,
+                                  std::vector<FDTensor> *outputs) {
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  if (image_batch->mats->empty()) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+
+  // There are 3 outputs, image, scale_factor, im_shape
+  // But im_shape is not used for all the PaddleDetection models
+  // So preprocessor will output the 3 FDTensors, and how to use `im_shape`
+  // is decided by the model itself
+  outputs->resize(3);
+  int batch = static_cast<int>(image_batch->mats->size());
+  // Allocate memory for scale_factor
+  (*outputs)[1].Resize({batch, 2}, FDDataType::FP32);
+  // Allocate memory for im_shape
+  (*outputs)[2].Resize({batch, 2}, FDDataType::FP32);
+  // Record the max size for a batch of input image
+  // All the tensor will pad to the max size to compose a batched tensor
+  std::vector<int> max_hw({-1, -1});
+
+  auto *scale_factor_ptr =
+      reinterpret_cast<float *>((*outputs)[1].MutableData());
+  auto *im_shape_ptr = reinterpret_cast<float *>((*outputs)[2].MutableData());
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    int origin_w = mat->Width();
+    int origin_h = mat->Height();
+    scale_factor_ptr[2 * i] = 1.0;
+    scale_factor_ptr[2 * i + 1] = 1.0;
+    for (size_t j = 0; j < processors_.size(); ++j) {
+      if (!(*(processors_[j].get()))(mat)) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << processors_[j]->Name() << "." << std::endl;
+        return false;
+      }
+      if (processors_[j]->Name().find("Resize") != std::string::npos) {
+        scale_factor_ptr[2 * i] = mat->Height() * 1.0 / origin_h;
+        scale_factor_ptr[2 * i + 1] = mat->Width() * 1.0 / origin_w;
+      }
+    }
+    if (mat->Height() > max_hw[0]) {
+      max_hw[0] = mat->Height();
+    }
+    if (mat->Width() > max_hw[1]) {
+      max_hw[1] = mat->Width();
+    }
+    im_shape_ptr[2 * i] = max_hw[0];
+    im_shape_ptr[2 * i + 1] = max_hw[1];
+  }
+
+  // if the size of image less than max_hw, pad to max_hw
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    if (mat->Height() < max_hw[0] || mat->Width() < max_hw[1]) {
+      pad_op_->SetWidthHeight(max_hw[1], max_hw[0]);
+      (*pad_op_)(mat);
+    }
+  }
+
+  // Get the NCHW tensor
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+
+  return true;
+}
+
+void PaddleDetPreprocessor::DisableNormalize() {
+  this->disable_normalize_ = true;
+  // the DisableNormalize function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+
+void PaddleDetPreprocessor::DisablePermute() {
+  this->disable_permute_ = true;
+  // the DisablePermute function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h
new file mode 100755
index 0000000000..b68809d99d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/detection/ppdet/preprocessor.h
@@ -0,0 +1,71 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for PaddleDet serials model.
+ */
+class ULTRAINFER_DECL PaddleDetPreprocessor : public ProcessorManager {
+public:
+  PaddleDetPreprocessor() = default;
+  /** \brief Create a preprocessor instance for PaddleDet serials model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * ppyoloe/infer_cfg.yml
+   */
+  explicit PaddleDetPreprocessor(const std::string &config_file);
+
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize();
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute();
+
+  std::string GetArch() { return arch_; }
+
+private:
+  bool BuildPreprocessPipelineFromConfig();
+  std::vector<std::shared_ptr<Processor>> processors_;
+  std::shared_ptr<PadToSize> pad_op_ =
+      std::make_shared<PadToSize>(0, 0, std::vector<float>(3, 0));
+  bool initialized_ = false;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  // read config file
+  std::string config_file_;
+  // read arch_ for postprocess
+  std::string arch_;
+};
+
+} // namespace detection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc
new file mode 100755
index 0000000000..a0fc686dd2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.cc
@@ -0,0 +1,134 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facealign/contrib/face_landmark_1000.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facealign {
+
+FaceLandmark1000::FaceLandmark1000(const std::string &model_file,
+                                   const std::string &params_file,
+                                   const RuntimeOption &custom_option,
+                                   const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool FaceLandmark1000::Initialize() {
+  // parameters for preprocess
+  size_ = {128, 128};
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool FaceLandmark1000::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<int, 2>> *im_info) {
+  // Resize
+  int resize_w = size_[0];
+  int resize_h = size_[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  // BRG2GRAY
+  BGR2GRAY::Run(mat);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool FaceLandmark1000::Postprocess(
+    FDTensor &infer_result, FaceAlignmentResult *result,
+    const std::map<std::string, std::array<int, 2>> &im_info) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  auto iter_in = im_info.find("input_shape");
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
+  int in_h = iter_in->second[0];
+  int in_w = iter_in->second[1];
+
+  result->Clear();
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; i += 2) {
+    float x = data[i];
+    float y = data[i + 1];
+    x = std::min(std::max(0.f, x), 1.0f);
+    y = std::min(std::max(0.f, y), 1.0f);
+    // decode landmarks (default 106 landmarks)
+    result->landmarks.emplace_back(std::array<float, 2>{x * in_w, y * in_h});
+  }
+
+  return true;
+}
+
+bool FaceLandmark1000::Predict(cv::Mat *im, FaceAlignmentResult *result) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<int, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {mat.Height(), mat.Width()};
+  im_info["output_shape"] = {mat.Height(), mat.Width()};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+  if (!Postprocess(output_tensors[0], result, im_info)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace facealign
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h
new file mode 100755
index 0000000000..231c36c3d6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facealign {
+/*! @brief FaceLandmark1000 model object used when to load a FaceLandmark1000
+ * model exported by FaceLandmark1000.
+ */
+class ULTRAINFER_DECL FaceLandmark1000 : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./face_landmarks_1000.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  FaceLandmark1000(const std::string &model_file,
+                   const std::string &params_file = "",
+                   const RuntimeOption &custom_option = RuntimeOption(),
+                   const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "FaceLandmark1000"; }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, FaceAlignmentResult *result);
+
+  /** \brief Get the input size of image
+   *
+   * \return Vector of int values, default {128,128}
+   */
+  std::vector<int> GetSize() { return size_; }
+  /** \brief Set the input size of image
+   *
+   * \param[in] size Vector of int values which represents {width, height} of
+   * image
+   */
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<int, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, FaceAlignmentResult *result,
+                   const std::map<std::string, std::array<int, 2>> &im_info);
+  // tuple of (width, height), default (128, 128)
+  std::vector<int> size_;
+};
+
+} // namespace facealign
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc
new file mode 100755
index 0000000000..8aae69945a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/face_landmark_1000_pybind.cc
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindFaceLandmark1000(pybind11::module &m) {
+  pybind11::class_<vision::facealign::FaceLandmark1000, UltraInferModel>(
+      m, "FaceLandmark1000")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(
+          "predict",
+          [](vision::facealign::FaceLandmark1000 &self, pybind11::array &data) {
+            auto mat = PyArrayToCvMat(data);
+            vision::FaceAlignmentResult res;
+            self.Predict(&mat, &res);
+            return res;
+          })
+      .def_property("size", &vision::facealign::FaceLandmark1000::GetSize,
+                    &vision::facealign::FaceLandmark1000::SetSize);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc
new file mode 100755
index 0000000000..261f1ac95a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.cc
@@ -0,0 +1,135 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facealign/contrib/pfld.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facealign {
+
+PFLD::PFLD(const std::string &model_file, const std::string &params_file,
+           const RuntimeOption &custom_option,
+           const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool PFLD::Initialize() {
+  // parameters for preprocess
+  size = {112, 112};
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PFLD::Preprocess(Mat *mat, FDTensor *output,
+                      std::map<std::string, std::array<int, 2>> *im_info) {
+  // Resize
+  int resize_w = size[0];
+  int resize_h = size[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  // Normalize
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool PFLD::Postprocess(
+    FDTensor &infer_result, FaceAlignmentResult *result,
+    const std::map<std::string, std::array<int, 2>> &im_info) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  auto iter_in = im_info.find("input_shape");
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
+  int in_h = iter_in->second[0];
+  int in_w = iter_in->second[1];
+
+  result->Clear();
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; i += 2) {
+    float x = data[i];
+    float y = data[i + 1];
+    x = std::min(std::max(0.f, x), 1.0f);
+    y = std::min(std::max(0.f, y), 1.0f);
+    // decode landmarks (default 106 landmarks)
+    result->landmarks.emplace_back(std::array<float, 2>{x * in_w, y * in_h});
+  }
+
+  return true;
+}
+
+bool PFLD::Predict(cv::Mat *im, FaceAlignmentResult *result) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<int, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {mat.Height(), mat.Width()};
+  im_info["output_shape"] = {mat.Height(), mat.Width()};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors[1], result, im_info)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace facealign
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h
new file mode 100755
index 0000000000..9bf0e59dc0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facealign {
+/*! @brief PFLD model object used when to load a PFLD model exported by PFLD.
+ */
+class ULTRAINFER_DECL PFLD : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./pfld.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  PFLD(const std::string &model_file, const std::string &params_file = "",
+       const RuntimeOption &custom_option = RuntimeOption(),
+       const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "PFLD"; }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, FaceAlignmentResult *result);
+
+  /// tuple of (width, height), default (112, 112)
+  std::vector<int> size;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<int, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, FaceAlignmentResult *result,
+                   const std::map<std::string, std::array<int, 2>> &im_info);
+};
+
+} // namespace facealign
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc
new file mode 100755
index 0000000000..e3f7e5e78d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pfld_pybind.cc
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPFLD(pybind11::module &m) {
+  pybind11::class_<vision::facealign::PFLD, UltraInferModel>(m, "PFLD")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facealign::PFLD &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceAlignmentResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def_readwrite("size", &vision::facealign::PFLD::size);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc
new file mode 100755
index 0000000000..6caceeece4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.cc
@@ -0,0 +1,687 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facealign/contrib/pipnet.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facealign {
+
+void PIPNet::GenerateLandmarks(std::vector<FDTensor> &infer_result,
+                               FaceAlignmentResult *result, float img_height,
+                               float img_width) {
+  FDTensor outputs_cls = infer_result.at(0);
+  FDTensor outputs_x = infer_result.at(1);
+  FDTensor outputs_y = infer_result.at(2);
+  FDTensor outputs_nb_x = infer_result.at(3);
+  FDTensor outputs_nb_y = infer_result.at(4);
+  int grid_h = outputs_cls.shape[2]; // 8
+  int grid_w = outputs_cls.shape[3]; // 8
+  int grid_length = grid_h * grid_w; // 8 * 8 = 64
+  int input_h = size_[1];
+  int input_w = size_[0];
+  // fetch data from pointers
+  const float *outputs_cls_ptr = static_cast<float *>(outputs_cls.Data());
+  const float *outputs_x_ptr = static_cast<float *>(outputs_x.Data());
+  const float *outputs_y_ptr = static_cast<float *>(outputs_y.Data());
+  const float *outputs_nb_x_ptr = static_cast<float *>(outputs_nb_x.Data());
+  const float *outputs_nb_y_ptr = static_cast<float *>(outputs_nb_y.Data());
+
+  // find max_ids
+  std::vector<unsigned int> max_ids(num_landmarks_);
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    const float *score_ptr = outputs_cls_ptr + i * grid_length;
+    unsigned int max_id = 0;
+    float max_score = score_ptr[0];
+    for (unsigned int j = 0; j < grid_length; ++j) {
+      if (score_ptr[j] > max_score) {
+        max_score = score_ptr[j];
+        max_id = j;
+      }
+    }
+    max_ids[i] = max_id; // range 0~64
+  }
+  // find x & y offsets
+  std::vector<float> output_x_select(num_landmarks_);
+  std::vector<float> output_y_select(num_landmarks_);
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    const float *offset_x_ptr = outputs_x_ptr + i * grid_length;
+    const float *offset_y_ptr = outputs_y_ptr + i * grid_length;
+    const unsigned int max_id = max_ids.at(i);
+    output_x_select[i] = offset_x_ptr[max_id];
+    output_y_select[i] = offset_y_ptr[max_id];
+  }
+
+  // find nb_x & nb_y offsets
+  std::map<unsigned int, std::vector<float>> output_nb_x_select;
+  std::map<unsigned int, std::vector<float>> output_nb_y_select;
+  // initialize offsets map
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    std::vector<float> nb_x_offset(num_nb_);
+    std::vector<float> nb_y_offset(num_nb_);
+    output_nb_x_select[i] = nb_x_offset;
+    output_nb_y_select[i] = nb_y_offset;
+  }
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    for (unsigned int j = 0; j < num_nb_; ++j) {
+      const unsigned int max_id = max_ids.at(i);
+      const float *offset_nb_x_ptr =
+          outputs_nb_x_ptr + (i * num_nb_ + j) * grid_length;
+      const float *offset_nb_y_ptr =
+          outputs_nb_y_ptr + (i * num_nb_ + j) * grid_length;
+      output_nb_x_select[i][j] = offset_nb_x_ptr[max_id];
+      output_nb_y_select[i][j] = offset_nb_y_ptr[max_id];
+    }
+  }
+
+  // calculate coords
+  std::vector<float> lms_pred_x(num_landmarks_);            // 19
+  std::vector<float> lms_pred_y(num_landmarks_);            // 19
+  std::map<unsigned int, std::vector<float>> lms_pred_nb_x; // 19,10
+  std::map<unsigned int, std::vector<float>> lms_pred_nb_y; // 19,10
+
+  // initialize pred maps
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    std::vector<float> nb_x_offset(num_nb_);
+    std::vector<float> nb_y_offset(num_nb_);
+    lms_pred_nb_x[i] = nb_x_offset;
+    lms_pred_nb_y[i] = nb_y_offset;
+  }
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    float cx = static_cast<float>(max_ids.at(i) % grid_w);
+    float cy = static_cast<float>(max_ids.at(i) / grid_w);
+    // calculate coords & normalize
+    lms_pred_x[i] =
+        ((cx + output_x_select[i]) * (float)net_stride_) / (float)input_w;
+    lms_pred_y[i] =
+        ((cy + output_y_select[i]) * (float)net_stride_) / (float)input_h;
+    for (unsigned int j = 0; j < num_nb_; ++j) {
+      lms_pred_nb_x[i][j] =
+          ((cx + output_nb_x_select[i][j]) * (float)net_stride_) /
+          (float)input_w;
+      lms_pred_nb_y[i][j] =
+          ((cy + output_nb_y_select[i][j]) * (float)net_stride_) /
+          (float)input_h;
+    }
+  }
+
+  // reverse indexes
+  std::map<unsigned int, std::vector<float>>
+      tmp_nb_x; // 19,max_len_map_[num_landmarks_]
+  std::map<unsigned int, std::vector<float>>
+      tmp_nb_y; // 19,max_len_map_[num_landmarks_]
+  // initialize reverse maps
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    std::vector<float> tmp_x(max_len_map_[num_landmarks_]);
+    std::vector<float> tmp_y(max_len_map_[num_landmarks_]);
+    tmp_nb_x[i] = tmp_x;
+    tmp_nb_y[i] = tmp_y;
+  }
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    for (unsigned int j = 0; j < max_len_map_[num_landmarks_]; ++j) {
+      unsigned int ri =
+          reverse_index1_map_[num_landmarks_]
+                             [i * max_len_map_[num_landmarks_] + j];
+      unsigned int rj =
+          reverse_index2_map_[num_landmarks_]
+                             [i * max_len_map_[num_landmarks_] + j];
+      tmp_nb_x[i][j] = lms_pred_nb_x[ri][rj];
+      tmp_nb_y[i][j] = lms_pred_nb_y[ri][rj];
+    }
+  }
+
+  // merge predictions
+  result->Clear();
+  for (unsigned int i = 0; i < num_landmarks_; ++i) {
+    float total_x = lms_pred_x[i];
+    float total_y = lms_pred_y[i];
+    for (unsigned int j = 0; j < max_len_map_[num_landmarks_]; ++j) {
+      total_x += tmp_nb_x[i][j];
+      total_y += tmp_nb_y[i][j];
+    }
+    float x = total_x / ((float)max_len_map_[num_landmarks_] + 1.f);
+    float y = total_y / ((float)max_len_map_[num_landmarks_] + 1.f);
+    x = std::min(std::max(0.f, x), 1.0f);
+    y = std::min(std::max(0.f, y), 1.0f);
+    result->landmarks.emplace_back(
+        std::array<float, 2>{x * img_width, y * img_height});
+  }
+};
+
+void PIPNet::SetNumLandmarks(const int &num_landmarks) {
+  if (std::find(supported_num_landmarks_.begin(),
+                supported_num_landmarks_.end(),
+                num_landmarks) == supported_num_landmarks_.end()) {
+    FDWARNING << "The number of landmarks should be in {19, 29, 68, 98}."
+              << std::endl;
+  }
+  num_landmarks_ = num_landmarks;
+}
+PIPNet::PIPNet(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool PIPNet::Initialize() {
+  // parameters for preprocess
+  size_ = {256, 256};
+  mean_vals_ = {0.485f, 0.456f, 0.406f};
+  std_vals_ = {0.229f, 0.224f, 0.225f};
+  num_nb_ = 10;
+  net_stride_ = 32;
+  num_landmarks_ = 19;
+  supported_num_landmarks_ = {19, 29, 68, 98};
+  // parameters for num_landmarks_ == 19
+  reverse_index1_map_[19] = {
+      1,  2,  6,  7,  8,  1,  2,  6,  7,  8,  1,  2,  6,  7,  8,  1,  2,  6,
+      0,  2,  3,  4,  6,  7,  8,  0,  2,  3,  4,  6,  7,  8,  0,  2,  3,  4,
+      0,  1,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 0,  1,  3,  4,  5,  6,
+      0,  1,  2,  4,  5,  6,  7,  8,  9,  10, 11, 14, 0,  1,  2,  4,  5,  6,
+      1,  2,  3,  5,  9,  10, 11, 1,  2,  3,  5,  9,  10, 11, 1,  2,  3,  5,
+      3,  4,  9,  10, 11, 3,  4,  9,  10, 11, 3,  4,  9,  10, 11, 3,  4,  9,
+      0,  1,  2,  3,  7,  8,  12, 13, 15, 0,  1,  2,  3,  7,  8,  12, 13, 15,
+      0,  1,  2,  3,  4,  5,  6,  8,  9,  10, 11, 12, 13, 15, 16, 18, 0,  1,
+      0,  1,  2,  3,  4,  5,  6,  7,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  11, 13, 14, 16, 17, 18, 0,  1,
+      3,  4,  5,  9,  10, 14, 17, 3,  4,  5,  9,  10, 14, 17, 3,  4,  5,  9,
+      0,  1,  6,  7,  8,  13, 14, 15, 16, 17, 18, 0,  1,  6,  7,  8,  13, 14,
+      0,  2,  5,  6,  7,  8,  9,  10, 11, 12, 14, 15, 16, 17, 18, 0,  2,  5,
+      4,  5,  9,  10, 11, 12, 13, 15, 16, 17, 18, 4,  5,  9,  10, 11, 12, 13,
+      12, 13, 14, 16, 17, 18, 12, 13, 14, 16, 17, 18, 12, 13, 14, 16, 17, 18,
+      12, 13, 14, 15, 17, 18, 12, 13, 14, 15, 17, 18, 12, 13, 14, 15, 17, 18,
+      12, 13, 14, 15, 16, 18, 12, 13, 14, 15, 16, 18, 12, 13, 14, 15, 16, 18,
+      15, 16, 17, 15, 16, 17, 15, 16, 17, 15, 16, 17, 15, 16, 17, 15, 16, 17};
+  reverse_index2_map_[19] = {
+      0, 6, 1, 4, 6, 0, 6, 1, 4, 6, 0, 6, 1, 4, 6, 0, 6, 1, 0, 1, 8, 7, 2, 2, 3,
+      0, 1, 8, 7, 2, 2, 3, 0, 1, 8, 7, 3, 1, 3, 5, 5, 4, 3, 1, 5, 6, 6, 9, 3, 1,
+      3, 5, 5, 4, 5, 5, 3, 1, 3, 7, 5, 5, 1, 3, 4, 9, 5, 5, 3, 1, 3, 7, 7, 8, 1,
+      0, 3, 2, 2, 7, 8, 1, 0, 3, 2, 2, 7, 8, 1, 0, 6, 0, 6, 4, 1, 6, 0, 6, 4, 1,
+      6, 0, 6, 4, 1, 6, 0, 6, 1, 3, 4, 9, 1, 2, 6, 9, 8, 1, 3, 4, 9, 1, 2, 6, 9,
+      8, 2, 2, 2, 7, 8, 9, 0, 0, 9, 9, 9, 5, 7, 7, 8, 8, 2, 2, 4, 4, 0, 5, 6, 6,
+      3, 0, 4, 5, 7, 4, 3, 8, 6, 6, 9, 6, 7, 6, 5, 0, 4, 4, 8, 6, 4, 0, 3, 8, 4,
+      4, 9, 7, 6, 7, 9, 8, 7, 2, 2, 2, 9, 9, 9, 0, 0, 8, 5, 9, 7, 9, 9, 8, 4, 3,
+      1, 2, 1, 6, 8, 4, 3, 1, 2, 1, 6, 8, 4, 3, 1, 2, 6, 9, 5, 7, 8, 0, 2, 1, 3,
+      4, 4, 6, 9, 5, 7, 8, 0, 2, 8, 9, 8, 6, 8, 7, 7, 8, 8, 0, 0, 2, 2, 2, 5, 8,
+      9, 8, 9, 7, 8, 7, 5, 2, 1, 4, 4, 1, 3, 9, 7, 8, 7, 5, 2, 1, 1, 5, 7, 0, 3,
+      1, 1, 5, 7, 0, 3, 1, 1, 5, 7, 0, 3, 1, 3, 2, 3, 0, 0, 0, 3, 2, 3, 0, 0, 0,
+      3, 2, 3, 0, 0, 0, 7, 6, 1, 3, 1, 2, 7, 6, 1, 3, 1, 2, 7, 6, 1, 3, 1, 2, 5,
+      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
+  max_len_map_[19] = 18;
+  // parameters for num_landmarks_ == 29
+  reverse_index1_map_[29] = {
+      2,  4,  5,  8,  12, 13, 16, 2,  4,  5,  8,  12, 13, 16, 2,  4,  5,  8,
+      12, 3,  6,  7,  9,  14, 15, 17, 3,  6,  7,  9,  14, 15, 17, 3,  6,  7,
+      9,  14, 0,  3,  4,  5,  6,  7,  8,  10, 11, 12, 13, 14, 16, 0,  3,  4,
+      5,  6,  7,  0,  1,  2,  4,  5,  6,  7,  9,  10, 11, 12, 14, 15, 17, 0,
+      1,  2,  4,  5,  0,  2,  5,  8,  10, 12, 13, 16, 0,  2,  5,  8,  10, 12,
+      13, 16, 0,  2,  5,  0,  2,  4,  8,  10, 12, 13, 16, 0,  2,  4,  8,  10,
+      12, 13, 16, 0,  2,  4,  1,  3,  7,  9,  11, 14, 15, 17, 1,  3,  7,  9,
+      11, 14, 15, 17, 1,  3,  7,  1,  3,  6,  9,  11, 14, 15, 17, 1,  3,  6,
+      9,  11, 14, 15, 17, 1,  3,  6,  0,  2,  4,  5,  10, 12, 13, 16, 0,  2,
+      4,  5,  10, 12, 13, 16, 0,  2,  4,  1,  3,  6,  7,  11, 14, 15, 17, 1,
+      3,  6,  7,  11, 14, 15, 17, 1,  3,  6,  0,  2,  3,  4,  5,  8,  12, 13,
+      16, 18, 20, 0,  2,  3,  4,  5,  8,  12, 13, 1,  2,  3,  6,  7,  9,  14,
+      15, 17, 19, 20, 21, 1,  2,  3,  6,  7,  9,  14, 0,  2,  4,  5,  8,  10,
+      13, 16, 0,  2,  4,  5,  8,  10, 13, 16, 0,  2,  4,  0,  2,  4,  5,  8,
+      10, 12, 16, 18, 22, 0,  2,  4,  5,  8,  10, 12, 16, 18, 1,  3,  6,  7,
+      9,  11, 15, 17, 1,  3,  6,  7,  9,  11, 15, 17, 1,  3,  6,  1,  3,  6,
+      7,  9,  11, 14, 17, 19, 23, 1,  3,  6,  7,  9,  11, 14, 17, 19, 0,  2,
+      4,  5,  8,  10, 12, 13, 18, 0,  2,  4,  5,  8,  10, 12, 13, 18, 0,  1,
+      3,  6,  7,  9,  11, 14, 15, 19, 1,  3,  6,  7,  9,  11, 14, 15, 19, 1,
+      0,  4,  5,  8,  10, 12, 13, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+      0,  1,  6,  7,  9,  11, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27,
+      28, 1,  1,  8,  9,  10, 11, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25,
+      26, 27, 28, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 18, 19, 20, 22, 23,
+      24, 25, 26, 27, 18, 20, 21, 24, 25, 26, 27, 28, 18, 20, 21, 24, 25, 26,
+      27, 28, 18, 20, 21, 19, 21, 24, 25, 26, 27, 28, 19, 21, 24, 25, 26, 27,
+      28, 19, 21, 24, 25, 26, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 18, 19,
+      20, 21, 22, 23, 25, 26, 27, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 18,
+      19, 20, 21, 22, 23, 24, 26, 27, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28,
+      18, 19, 20, 21, 22, 23, 24, 25, 27, 20, 21, 22, 23, 24, 25, 26, 28, 20,
+      21, 22, 23, 24, 25, 26, 28, 20, 21, 22, 22, 23, 24, 25, 26, 27, 22, 23,
+      24, 25, 26, 27, 22, 23, 24, 25, 26, 27, 22};
+  reverse_index2_map_[29] = {
+      9, 3, 5, 3, 7, 7, 7, 9, 3, 5, 3, 7, 7, 7, 9, 3, 5, 3, 7, 9, 3, 5, 3, 7,
+      7, 7, 9, 3, 5, 3, 7, 7, 7, 9, 3, 5, 3, 7, 7, 6, 6, 6, 8, 9, 7, 0, 9, 6,
+      5, 9, 6, 7, 6, 6, 6, 8, 9, 9, 7, 6, 8, 9, 6, 6, 7, 8, 0, 9, 6, 6, 6, 9,
+      7, 6, 8, 9, 2, 5, 0, 5, 5, 3, 6, 5, 2, 5, 0, 5, 5, 3, 6, 5, 2, 5, 0, 1,
+      3, 0, 4, 4, 2, 4, 2, 1, 3, 0, 4, 4, 2, 4, 2, 1, 3, 0, 2, 4, 0, 5, 5, 3,
+      5, 5, 2, 4, 0, 5, 5, 3, 5, 5, 2, 4, 0, 1, 3, 0, 4, 4, 2, 4, 2, 1, 3, 0,
+      4, 4, 2, 4, 2, 1, 3, 0, 0, 7, 4, 3, 6, 5, 3, 4, 0, 7, 4, 3, 6, 5, 3, 4,
+      0, 7, 4, 0, 7, 4, 3, 6, 5, 2, 4, 0, 7, 4, 3, 6, 5, 2, 4, 0, 7, 4, 6, 0,
+      8, 7, 7, 6, 4, 2, 3, 5, 6, 6, 0, 8, 7, 7, 6, 4, 2, 6, 8, 0, 7, 7, 6, 4,
+      3, 3, 5, 7, 9, 6, 8, 0, 7, 7, 6, 4, 3, 1, 1, 1, 2, 3, 1, 0, 3, 1, 1, 1,
+      2, 3, 1, 0, 3, 1, 1, 5, 4, 5, 4, 0, 2, 1, 1, 6, 9, 5, 4, 5, 4, 0, 2, 1,
+      1, 6, 3, 1, 1, 1, 2, 3, 1, 0, 3, 1, 1, 1, 2, 3, 1, 0, 3, 1, 1, 5, 5, 5,
+      4, 0, 2, 1, 1, 7, 9, 5, 5, 5, 4, 0, 2, 1, 1, 7, 4, 2, 2, 2, 1, 1, 0, 0,
+      9, 4, 2, 2, 2, 1, 1, 0, 0, 9, 4, 4, 2, 2, 2, 1, 1, 0, 0, 9, 4, 2, 2, 2,
+      1, 1, 0, 0, 9, 4, 8, 9, 8, 8, 7, 8, 8, 8, 8, 1, 3, 0, 8, 5, 8, 9, 9, 9,
+      8, 8, 9, 8, 8, 7, 8, 8, 8, 8, 2, 4, 8, 0, 6, 7, 8, 8, 7, 8, 9, 9, 9, 9,
+      8, 9, 9, 9, 9, 0, 0, 0, 6, 6, 4, 4, 6, 7, 8, 1, 1, 0, 5, 5, 2, 3, 3, 4,
+      6, 1, 1, 0, 5, 5, 2, 3, 3, 4, 2, 8, 7, 7, 5, 4, 6, 5, 2, 8, 7, 7, 5, 4,
+      6, 5, 2, 8, 7, 2, 8, 8, 6, 5, 5, 4, 2, 8, 8, 6, 5, 5, 4, 2, 8, 8, 6, 5,
+      3, 3, 3, 1, 2, 3, 0, 2, 2, 3, 3, 3, 3, 1, 2, 3, 0, 2, 2, 4, 4, 4, 2, 1,
+      1, 0, 0, 1, 2, 4, 4, 4, 2, 1, 1, 0, 0, 1, 7, 6, 5, 5, 3, 2, 1, 1, 0, 1,
+      7, 6, 5, 5, 3, 2, 1, 1, 0, 9, 6, 4, 4, 3, 2, 1, 0, 9, 6, 4, 4, 3, 2, 1,
+      0, 9, 6, 4, 7, 7, 9, 9, 7, 3, 7, 7, 9, 9, 7, 3, 7, 7, 9, 9, 7, 3, 7};
+  max_len_map_[29] = 19;
+  // parameters for num_landmarks_ == 68
+  reverse_index1_map_[68] = {
+      1,  2,  17, 18, 36, 1,  2,  17, 18, 36, 1,  2,  17, 18, 36, 1,  2,  17,
+      18, 36, 1,  2,  0,  2,  3,  17, 0,  2,  3,  17, 0,  2,  3,  17, 0,  2,
+      3,  17, 0,  2,  3,  17, 0,  2,  0,  1,  3,  4,  0,  1,  3,  4,  0,  1,
+      3,  4,  0,  1,  3,  4,  0,  1,  3,  4,  0,  1,  1,  2,  4,  5,  1,  2,
+      4,  5,  1,  2,  4,  5,  1,  2,  4,  5,  1,  2,  4,  5,  1,  2,  2,  3,
+      5,  6,  2,  3,  5,  6,  2,  3,  5,  6,  2,  3,  5,  6,  2,  3,  5,  6,
+      2,  3,  3,  4,  6,  7,  3,  4,  6,  7,  3,  4,  6,  7,  3,  4,  6,  7,
+      3,  4,  6,  7,  3,  4,  3,  4,  5,  7,  8,  3,  4,  5,  7,  8,  3,  4,
+      5,  7,  8,  3,  4,  5,  7,  8,  3,  4,  5,  6,  8,  9,  5,  6,  8,  9,
+      5,  6,  8,  9,  5,  6,  8,  9,  5,  6,  8,  9,  5,  6,  6,  7,  9,  10,
+      6,  7,  9,  10, 6,  7,  9,  10, 6,  7,  9,  10, 6,  7,  9,  10, 6,  7,
+      7,  8,  10, 11, 7,  8,  10, 11, 7,  8,  10, 11, 7,  8,  10, 11, 7,  8,
+      10, 11, 7,  8,  8,  9,  11, 12, 13, 8,  9,  11, 12, 13, 8,  9,  11, 12,
+      13, 8,  9,  11, 12, 13, 8,  9,  9,  10, 12, 13, 9,  10, 12, 13, 9,  10,
+      12, 13, 9,  10, 12, 13, 9,  10, 12, 13, 9,  10, 10, 11, 13, 14, 10, 11,
+      13, 14, 10, 11, 13, 14, 10, 11, 13, 14, 10, 11, 13, 14, 10, 11, 11, 12,
+      14, 15, 11, 12, 14, 15, 11, 12, 14, 15, 11, 12, 14, 15, 11, 12, 14, 15,
+      11, 12, 12, 13, 15, 16, 12, 13, 15, 16, 12, 13, 15, 16, 12, 13, 15, 16,
+      12, 13, 15, 16, 12, 13, 13, 14, 16, 26, 13, 14, 16, 26, 13, 14, 16, 26,
+      13, 14, 16, 26, 13, 14, 16, 26, 13, 14, 14, 15, 25, 26, 45, 14, 15, 25,
+      26, 45, 14, 15, 25, 26, 45, 14, 15, 25, 26, 45, 14, 15, 0,  1,  2,  18,
+      19, 36, 37, 41, 0,  1,  2,  18, 19, 36, 37, 41, 0,  1,  2,  18, 19, 36,
+      0,  1,  17, 19, 20, 36, 37, 38, 41, 0,  1,  17, 19, 20, 36, 37, 38, 41,
+      0,  1,  17, 19, 0,  17, 18, 20, 21, 36, 37, 38, 40, 41, 0,  17, 18, 20,
+      21, 36, 37, 38, 40, 41, 0,  17, 17, 18, 19, 21, 36, 37, 38, 39, 40, 41,
+      17, 18, 19, 21, 36, 37, 38, 39, 40, 41, 17, 18, 18, 19, 20, 22, 27, 28,
+      37, 38, 39, 40, 41, 18, 19, 20, 22, 27, 28, 37, 38, 39, 40, 41, 21, 23,
+      24, 25, 27, 28, 42, 43, 44, 46, 47, 21, 23, 24, 25, 27, 28, 42, 43, 44,
+      46, 47, 22, 24, 25, 26, 42, 43, 44, 45, 46, 47, 22, 24, 25, 26, 42, 43,
+      44, 45, 46, 47, 22, 24, 16, 22, 23, 25, 26, 43, 44, 45, 46, 47, 16, 22,
+      23, 25, 26, 43, 44, 45, 46, 47, 16, 22, 15, 16, 23, 24, 26, 43, 44, 45,
+      46, 15, 16, 23, 24, 26, 43, 44, 45, 46, 15, 16, 23, 24, 14, 15, 16, 24,
+      25, 44, 45, 46, 14, 15, 16, 24, 25, 44, 45, 46, 14, 15, 16, 24, 25, 44,
+      20, 21, 22, 23, 28, 29, 38, 39, 40, 42, 43, 47, 20, 21, 22, 23, 28, 29,
+      38, 39, 40, 42, 21, 22, 27, 29, 30, 39, 40, 42, 47, 21, 22, 27, 29, 30,
+      39, 40, 42, 47, 21, 22, 27, 29, 27, 28, 30, 31, 35, 39, 42, 27, 28, 30,
+      31, 35, 39, 42, 27, 28, 30, 31, 35, 39, 42, 27, 28, 29, 31, 32, 33, 34,
+      35, 28, 29, 31, 32, 33, 34, 35, 28, 29, 31, 32, 33, 34, 35, 28, 2,  3,
+      29, 30, 32, 33, 48, 49, 2,  3,  29, 30, 32, 33, 48, 49, 2,  3,  29, 30,
+      32, 33, 29, 30, 31, 33, 34, 35, 49, 50, 29, 30, 31, 33, 34, 35, 49, 50,
+      29, 30, 31, 33, 34, 35, 29, 30, 31, 32, 34, 35, 50, 51, 52, 29, 30, 31,
+      32, 34, 35, 50, 51, 52, 29, 30, 31, 32, 29, 30, 31, 32, 33, 35, 52, 53,
+      29, 30, 31, 32, 33, 35, 52, 53, 29, 30, 31, 32, 33, 35, 13, 14, 29, 30,
+      32, 33, 34, 53, 54, 13, 14, 29, 30, 32, 33, 34, 53, 54, 13, 14, 29, 30,
+      0,  1,  2,  17, 18, 19, 20, 37, 38, 39, 40, 41, 0,  1,  2,  17, 18, 19,
+      20, 37, 38, 39, 0,  1,  17, 18, 19, 20, 21, 36, 38, 39, 40, 41, 0,  1,
+      17, 18, 19, 20, 21, 36, 38, 39, 0,  1,  17, 18, 19, 20, 21, 27, 28, 36,
+      37, 39, 40, 41, 0,  1,  17, 18, 19, 20, 21, 27, 19, 20, 21, 27, 28, 29,
+      36, 37, 38, 40, 41, 19, 20, 21, 27, 28, 29, 36, 37, 38, 40, 41, 0,  1,
+      17, 18, 19, 20, 21, 27, 28, 36, 37, 38, 39, 41, 0,  1,  17, 18, 19, 20,
+      21, 27, 0,  1,  2,  17, 18, 19, 20, 21, 36, 37, 38, 39, 40, 0,  1,  2,
+      17, 18, 19, 20, 21, 36, 22, 23, 24, 27, 28, 29, 43, 44, 45, 46, 47, 22,
+      23, 24, 27, 28, 29, 43, 44, 45, 46, 47, 15, 16, 22, 23, 24, 25, 26, 27,
+      42, 44, 45, 46, 47, 15, 16, 22, 23, 24, 25, 26, 27, 42, 15, 16, 22, 23,
+      24, 25, 26, 42, 43, 45, 46, 47, 15, 16, 22, 23, 24, 25, 26, 42, 43, 45,
+      14, 15, 16, 23, 24, 25, 26, 42, 43, 44, 46, 47, 14, 15, 16, 23, 24, 25,
+      26, 42, 43, 44, 14, 15, 16, 22, 23, 24, 25, 26, 42, 43, 44, 45, 47, 14,
+      15, 16, 22, 23, 24, 25, 26, 42, 15, 16, 22, 23, 24, 25, 26, 27, 28, 42,
+      43, 44, 45, 46, 15, 16, 22, 23, 24, 25, 26, 27, 2,  3,  4,  5,  6,  49,
+      59, 60, 2,  3,  4,  5,  6,  49, 59, 60, 2,  3,  4,  5,  6,  49, 3,  4,
+      5,  31, 32, 48, 50, 51, 59, 60, 61, 67, 3,  4,  5,  31, 32, 48, 50, 51,
+      59, 60, 30, 31, 32, 33, 34, 48, 49, 51, 52, 58, 59, 60, 61, 62, 66, 67,
+      30, 31, 32, 33, 34, 48, 30, 31, 32, 33, 34, 35, 48, 49, 50, 52, 53, 54,
+      56, 58, 60, 61, 62, 63, 64, 65, 66, 67, 30, 32, 33, 34, 35, 50, 51, 53,
+      54, 55, 56, 62, 63, 64, 65, 30, 32, 33, 34, 35, 50, 51, 11, 12, 13, 34,
+      35, 52, 54, 55, 63, 64, 65, 11, 12, 13, 34, 35, 52, 54, 55, 63, 64, 65,
+      10, 11, 12, 13, 14, 53, 55, 64, 10, 11, 12, 13, 14, 53, 55, 64, 10, 11,
+      12, 13, 14, 53, 8,  9,  10, 11, 12, 13, 53, 54, 56, 57, 63, 64, 65, 8,
+      9,  10, 11, 12, 13, 53, 54, 56, 7,  8,  9,  10, 11, 12, 54, 55, 57, 58,
+      63, 64, 65, 66, 7,  8,  9,  10, 11, 12, 54, 55, 6,  7,  8,  9,  10, 55,
+      56, 58, 59, 62, 65, 66, 67, 6,  7,  8,  9,  10, 55, 56, 58, 59, 4,  5,
+      6,  7,  8,  9,  48, 56, 57, 59, 60, 61, 62, 66, 67, 4,  5,  6,  7,  8,
+      9,  48, 3,  4,  5,  6,  7,  8,  48, 49, 57, 58, 60, 61, 67, 3,  4,  5,
+      6,  7,  8,  48, 49, 57, 2,  3,  4,  5,  6,  31, 48, 49, 59, 2,  3,  4,
+      5,  6,  31, 48, 49, 59, 2,  3,  4,  5,  31, 32, 33, 48, 49, 50, 51, 52,
+      57, 58, 59, 60, 62, 63, 66, 67, 31, 32, 33, 48, 49, 50, 33, 34, 48, 49,
+      50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 64, 65, 66, 67, 33,
+      34, 35, 50, 51, 52, 53, 54, 55, 56, 57, 61, 62, 64, 65, 66, 34, 35, 50,
+      51, 52, 53, 54, 10, 11, 12, 13, 14, 35, 53, 54, 55, 10, 11, 12, 13, 14,
+      35, 53, 54, 55, 10, 11, 12, 13, 9,  10, 11, 12, 51, 52, 53, 54, 55, 56,
+      57, 58, 61, 62, 63, 64, 66, 67, 9,  10, 11, 12, 7,  8,  9,  50, 51, 52,
+      55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 7,  8,  9,  50, 4,  5,
+      6,  7,  48, 49, 50, 51, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 4,  5,
+      6,  7};
+  reverse_index2_map_[68] = {
+      0, 3, 1, 7, 8, 0, 3, 1, 7, 8, 0, 3, 1, 7, 8, 0, 3, 1, 7, 8, 0, 3, 1, 1, 4,
+      9, 1, 1, 4, 9, 1, 1, 4, 9, 1, 1, 4, 9, 1, 1, 4, 9, 1, 1, 6, 1, 1, 5, 6, 1,
+      1, 5, 6, 1, 1, 5, 6, 1, 1, 5, 6, 1, 1, 5, 6, 1, 5, 0, 0, 6, 5, 0, 0, 6, 5,
+      0, 0, 6, 5, 0, 0, 6, 5, 0, 0, 6, 5, 0, 2, 0, 1, 7, 2, 0, 1, 7, 2, 0, 1, 7,
+      2, 0, 1, 7, 2, 0, 1, 7, 2, 0, 2, 1, 1, 6, 2, 1, 1, 6, 2, 1, 1, 6, 2, 1, 1,
+      6, 2, 1, 1, 6, 2, 1, 9, 4, 0, 1, 4, 9, 4, 0, 1, 4, 9, 4, 0, 1, 4, 9, 4, 0,
+      1, 4, 9, 4, 5, 0, 1, 3, 5, 0, 1, 3, 5, 0, 1, 3, 5, 0, 1, 3, 5, 0, 1, 3, 5,
+      0, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 0, 4, 4, 0, 3, 0,
+      0, 5, 3, 0, 0, 5, 3, 0, 0, 5, 3, 0, 0, 5, 3, 0, 0, 5, 3, 0, 3, 1, 0, 4, 9,
+      3, 1, 0, 4, 9, 3, 1, 0, 4, 9, 3, 1, 0, 4, 9, 3, 1, 6, 1, 0, 2, 6, 1, 0, 2,
+      6, 1, 0, 2, 6, 1, 0, 2, 6, 1, 0, 2, 6, 1, 7, 1, 0, 2, 7, 1, 0, 2, 7, 1, 0,
+      2, 7, 1, 0, 2, 7, 1, 0, 2, 7, 1, 6, 1, 1, 4, 6, 1, 1, 4, 6, 1, 1, 4, 6, 1,
+      1, 4, 6, 1, 1, 4, 6, 1, 5, 1, 0, 6, 5, 1, 0, 6, 5, 1, 0, 6, 5, 1, 0, 6, 5,
+      1, 0, 6, 5, 1, 3, 0, 0, 9, 3, 0, 0, 9, 3, 0, 0, 9, 3, 0, 0, 9, 3, 0, 0, 9,
+      3, 0, 3, 1, 7, 2, 8, 3, 1, 7, 2, 8, 3, 1, 7, 2, 8, 3, 1, 7, 2, 8, 3, 1, 0,
+      3, 9, 0, 4, 4, 8, 6, 0, 3, 9, 0, 4, 4, 8, 6, 0, 3, 9, 0, 4, 4, 3, 8, 0, 0,
+      6, 5, 7, 9, 7, 3, 8, 0, 0, 6, 5, 7, 9, 7, 3, 8, 0, 0, 7, 4, 1, 1, 6, 6, 5,
+      7, 9, 5, 7, 4, 1, 1, 6, 6, 5, 7, 9, 5, 7, 4, 8, 4, 1, 0, 9, 6, 4, 7, 6, 8,
+      8, 4, 1, 0, 9, 6, 4, 7, 6, 8, 8, 4, 9, 6, 0, 4, 2, 7, 9, 6, 5, 5, 9, 9, 6,
+      0, 4, 2, 7, 9, 6, 5, 5, 9, 4, 1, 6, 9, 3, 8, 5, 6, 9, 9, 6, 4, 1, 6, 9, 3,
+      8, 5, 6, 9, 9, 6, 0, 1, 4, 8, 7, 5, 7, 9, 8, 5, 0, 1, 4, 8, 7, 5, 7, 9, 8,
+      5, 0, 1, 7, 6, 0, 1, 4, 7, 5, 6, 6, 9, 7, 6, 0, 1, 4, 7, 5, 6, 6, 9, 7, 6,
+      8, 3, 5, 0, 0, 9, 6, 5, 7, 8, 3, 5, 0, 0, 9, 6, 5, 7, 8, 3, 5, 0, 8, 3, 1,
+      4, 0, 8, 4, 5, 8, 3, 1, 4, 0, 8, 4, 5, 8, 3, 1, 4, 0, 8, 9, 1, 1, 9, 1, 2,
+      8, 4, 7, 2, 8, 7, 9, 1, 1, 9, 1, 2, 8, 4, 7, 2, 8, 8, 0, 0, 6, 6, 8, 6, 8,
+      8, 8, 0, 0, 6, 6, 8, 6, 8, 8, 8, 0, 0, 5, 0, 0, 9, 9, 9, 9, 5, 0, 0, 9, 9,
+      9, 9, 5, 0, 0, 9, 9, 9, 9, 5, 4, 1, 2, 2, 2, 2, 2, 4, 1, 2, 2, 2, 2, 2, 4,
+      1, 2, 2, 2, 2, 2, 4, 8, 8, 6, 5, 0, 7, 7, 9, 8, 8, 6, 5, 0, 7, 7, 9, 8, 8,
+      6, 5, 0, 7, 4, 3, 0, 0, 4, 5, 8, 7, 4, 3, 0, 0, 4, 5, 8, 7, 4, 3, 0, 0, 4,
+      5, 7, 2, 1, 1, 1, 1, 5, 8, 5, 7, 2, 1, 1, 1, 1, 5, 8, 5, 7, 2, 1, 1, 3, 1,
+      5, 4, 1, 0, 6, 9, 3, 1, 5, 4, 1, 0, 6, 9, 3, 1, 5, 4, 1, 0, 8, 9, 5, 4, 9,
+      6, 0, 8, 7, 8, 9, 5, 4, 9, 6, 0, 8, 7, 8, 9, 5, 4, 2, 2, 4, 2, 3, 5, 8, 1,
+      5, 8, 4, 1, 2, 2, 4, 2, 3, 5, 8, 1, 5, 8, 5, 6, 3, 2, 2, 3, 7, 1, 1, 3, 3,
+      0, 5, 6, 3, 2, 2, 3, 7, 1, 1, 3, 9, 9, 6, 6, 3, 2, 2, 7, 9, 3, 2, 1, 0, 3,
+      9, 9, 6, 6, 3, 2, 2, 7, 9, 4, 3, 4, 3, 9, 7, 4, 2, 1, 4, 9, 4, 3, 4, 3, 9,
+      7, 4, 2, 1, 4, 8, 7, 7, 8, 8, 5, 5, 8, 5, 2, 3, 0, 0, 2, 8, 7, 7, 8, 8, 5,
+      5, 8, 4, 4, 5, 5, 5, 7, 7, 9, 0, 0, 3, 2, 2, 4, 4, 5, 5, 5, 7, 7, 9, 0, 3,
+      4, 9, 1, 2, 8, 2, 4, 7, 4, 2, 3, 4, 9, 1, 2, 8, 2, 4, 7, 4, 2, 9, 9, 2, 2,
+      3, 6, 6, 6, 1, 2, 3, 3, 0, 9, 9, 2, 2, 3, 6, 6, 6, 1, 6, 5, 7, 3, 2, 2, 3,
+      4, 1, 1, 1, 3, 6, 5, 7, 3, 2, 2, 3, 4, 1, 1, 4, 2, 2, 8, 5, 3, 1, 8, 4, 1,
+      0, 4, 4, 2, 2, 8, 5, 3, 1, 8, 4, 1, 5, 5, 4, 9, 7, 7, 5, 5, 3, 3, 0, 0, 1,
+      5, 5, 4, 9, 7, 7, 5, 5, 3, 7, 8, 5, 6, 8, 8, 7, 9, 6, 0, 0, 3, 2, 2, 7, 8,
+      5, 6, 8, 8, 7, 9, 6, 3, 2, 2, 5, 3, 3, 0, 6, 3, 2, 2, 5, 3, 3, 0, 6, 3, 2,
+      2, 5, 3, 6, 7, 8, 4, 6, 1, 3, 9, 4, 1, 5, 8, 6, 7, 8, 4, 6, 1, 3, 9, 4, 1,
+      7, 3, 3, 4, 8, 5, 1, 1, 7, 9, 8, 5, 1, 6, 9, 5, 7, 3, 3, 4, 8, 5, 9, 6, 5,
+      3, 5, 6, 9, 6, 1, 1, 6, 9, 8, 8, 8, 3, 0, 3, 8, 6, 6, 6, 8, 8, 5, 3, 3, 8,
+      2, 1, 5, 8, 9, 7, 1, 5, 4, 8, 8, 5, 3, 3, 8, 2, 8, 7, 6, 6, 4, 3, 1, 3, 5,
+      1, 8, 8, 7, 6, 6, 4, 3, 1, 3, 5, 1, 8, 5, 2, 2, 4, 6, 2, 4, 0, 5, 2, 2, 4,
+      6, 2, 4, 0, 5, 2, 2, 4, 6, 2, 7, 5, 2, 3, 6, 7, 5, 2, 2, 9, 8, 2, 5, 7, 5,
+      2, 3, 6, 7, 5, 2, 2, 7, 5, 2, 3, 7, 8, 6, 0, 1, 5, 7, 6, 3, 8, 7, 5, 2, 3,
+      7, 8, 6, 0, 8, 4, 2, 4, 8, 7, 0, 0, 7, 8, 7, 4, 7, 8, 4, 2, 4, 8, 7, 0, 0,
+      7, 9, 7, 3, 2, 6, 7, 6, 5, 0, 0, 6, 7, 9, 7, 3, 9, 7, 3, 2, 6, 7, 6, 7, 6,
+      3, 2, 5, 8, 2, 5, 8, 2, 2, 8, 4, 7, 6, 3, 2, 5, 8, 2, 5, 8, 7, 5, 3, 4, 6,
+      8, 0, 0, 1, 7, 5, 3, 4, 6, 8, 0, 0, 1, 7, 5, 3, 4, 7, 7, 9, 3, 2, 0, 3, 9,
+      6, 4, 5, 3, 2, 6, 3, 0, 7, 7, 9, 3, 2, 0, 8, 9, 8, 7, 2, 0, 2, 7, 8, 9, 6,
+      5, 6, 9, 7, 2, 2, 7, 2, 0, 2, 8, 7, 7, 9, 4, 0, 3, 3, 5, 4, 7, 6, 3, 3, 0,
+      5, 7, 7, 9, 4, 0, 3, 3, 6, 4, 3, 5, 7, 8, 0, 0, 1, 6, 4, 3, 5, 7, 8, 0, 0,
+      1, 6, 4, 3, 5, 8, 9, 9, 9, 7, 4, 4, 4, 2, 1, 4, 7, 9, 5, 0, 4, 2, 9, 8, 9,
+      9, 9, 9, 9, 9, 6, 5, 8, 6, 3, 2, 3, 6, 9, 4, 1, 4, 9, 1, 1, 9, 9, 9, 6, 8,
+      9, 9, 8, 4, 4, 4, 6, 7, 3, 1, 2, 4, 0, 4, 9, 9, 1, 8, 9, 9, 8};
+  max_len_map_[68] = 22;
+  // parameters for num_landmarks_ == 98
+  reverse_index1_map_[98] = {
+      1,  2,  3,  4,  5,  33, 1,  2,  3,  4,  5,  33, 1,  2,  3,  4,  5,  0,
+      2,  3,  4,  5,  6,  33, 0,  2,  3,  4,  5,  6,  33, 0,  2,  3,  0,  1,
+      3,  4,  5,  6,  0,  1,  3,  4,  5,  6,  0,  1,  3,  4,  5,  0,  1,  2,
+      4,  5,  6,  7,  0,  1,  2,  4,  5,  6,  7,  0,  1,  2,  0,  1,  2,  3,
+      5,  6,  7,  8,  0,  1,  2,  3,  5,  6,  7,  8,  0,  1,  2,  3,  4,  6,
+      7,  8,  9,  1,  2,  3,  4,  6,  7,  8,  9,  1,  2,  3,  4,  5,  7,  8,
+      9,  10, 2,  3,  4,  5,  7,  8,  9,  10, 2,  3,  4,  5,  6,  8,  9,  10,
+      3,  4,  5,  6,  8,  9,  10, 3,  4,  5,  4,  5,  6,  7,  9,  10, 11, 4,
+      5,  6,  7,  9,  10, 11, 4,  5,  6,  4,  5,  6,  7,  8,  10, 11, 12, 4,
+      5,  6,  7,  8,  10, 11, 12, 4,  5,  6,  7,  8,  9,  11, 12, 13, 76, 5,
+      6,  7,  8,  9,  11, 12, 13, 7,  8,  9,  10, 12, 13, 14, 76, 88, 7,  8,
+      9,  10, 12, 13, 14, 76, 8,  9,  10, 11, 13, 14, 15, 8,  9,  10, 11, 13,
+      14, 15, 8,  9,  10, 10, 11, 12, 14, 15, 16, 10, 11, 12, 14, 15, 16, 10,
+      11, 12, 14, 15, 11, 12, 13, 15, 16, 17, 11, 12, 13, 15, 16, 17, 11, 12,
+      13, 15, 16, 12, 13, 14, 16, 17, 18, 12, 13, 14, 16, 17, 18, 12, 13, 14,
+      16, 17, 13, 14, 15, 17, 18, 19, 13, 14, 15, 17, 18, 19, 13, 14, 15, 17,
+      18, 14, 15, 16, 18, 19, 20, 14, 15, 16, 18, 19, 20, 14, 15, 16, 18, 19,
+      15, 16, 17, 19, 20, 21, 15, 16, 17, 19, 20, 21, 15, 16, 17, 19, 20, 16,
+      17, 18, 20, 21, 22, 16, 17, 18, 20, 21, 22, 16, 17, 18, 20, 21, 17, 18,
+      19, 21, 22, 23, 24, 17, 18, 19, 21, 22, 23, 24, 17, 18, 19, 18, 19, 20,
+      22, 23, 24, 25, 82, 18, 19, 20, 22, 23, 24, 25, 82, 18, 19, 20, 21, 23,
+      24, 25, 26, 27, 19, 20, 21, 23, 24, 25, 26, 27, 19, 20, 21, 22, 24, 25,
+      26, 27, 28, 20, 21, 22, 24, 25, 26, 27, 28, 20, 21, 22, 23, 25, 26, 27,
+      28, 21, 22, 23, 25, 26, 27, 28, 21, 22, 23, 21, 22, 23, 24, 26, 27, 28,
+      29, 21, 22, 23, 24, 26, 27, 28, 29, 21, 22, 23, 24, 25, 27, 28, 29, 30,
+      22, 23, 24, 25, 27, 28, 29, 30, 22, 23, 24, 25, 26, 28, 29, 30, 31, 23,
+      24, 25, 26, 28, 29, 30, 31, 23, 24, 25, 26, 27, 29, 30, 31, 32, 24, 25,
+      26, 27, 29, 30, 31, 32, 24, 25, 26, 27, 28, 30, 31, 32, 25, 26, 27, 28,
+      30, 31, 32, 25, 26, 27, 26, 27, 28, 29, 31, 32, 26, 27, 28, 29, 31, 32,
+      26, 27, 28, 29, 31, 26, 27, 28, 29, 30, 32, 46, 26, 27, 28, 29, 30, 32,
+      46, 26, 27, 28, 27, 28, 29, 30, 31, 46, 27, 28, 29, 30, 31, 46, 27, 28,
+      29, 30, 31, 0,  1,  2,  3,  34, 41, 60, 0,  1,  2,  3,  34, 41, 60, 0,
+      1,  2,  0,  33, 35, 40, 41, 60, 0,  33, 35, 40, 41, 60, 0,  33, 35, 40,
+      41, 33, 34, 36, 37, 39, 40, 41, 60, 61, 62, 33, 34, 36, 37, 39, 40, 41,
+      34, 35, 37, 38, 39, 40, 63, 64, 34, 35, 37, 38, 39, 40, 63, 64, 34, 36,
+      38, 39, 51, 64, 36, 38, 39, 51, 64, 36, 38, 39, 51, 64, 36, 38, 36, 37,
+      39, 51, 52, 63, 64, 65, 36, 37, 39, 51, 52, 63, 64, 65, 36, 35, 36, 37,
+      38, 40, 62, 63, 64, 65, 66, 67, 96, 35, 36, 37, 38, 40, 33, 34, 35, 36,
+      37, 38, 39, 41, 60, 61, 62, 63, 65, 66, 67, 96, 33, 0,  1,  2,  33, 34,
+      35, 40, 60, 61, 67, 0,  1,  2,  33, 34, 35, 40, 43, 49, 50, 51, 68, 43,
+      49, 50, 51, 68, 43, 49, 50, 51, 68, 43, 49, 42, 44, 45, 48, 49, 50, 68,
+      69, 42, 44, 45, 48, 49, 50, 68, 69, 42, 42, 43, 45, 46, 47, 48, 49, 70,
+      42, 43, 45, 46, 47, 48, 49, 70, 42, 32, 44, 46, 47, 48, 71, 72, 73, 32,
+      44, 46, 47, 48, 71, 72, 73, 32, 29, 30, 31, 32, 45, 47, 72, 29, 30, 31,
+      32, 45, 47, 72, 29, 30, 31, 30, 31, 32, 44, 45, 46, 48, 71, 72, 73, 30,
+      31, 32, 44, 45, 46, 48, 42, 43, 44, 45, 46, 47, 49, 50, 69, 70, 71, 72,
+      73, 74, 75, 97, 42, 42, 43, 44, 48, 50, 68, 69, 70, 74, 75, 97, 42, 43,
+      44, 48, 50, 68, 42, 43, 49, 51, 52, 68, 69, 75, 42, 43, 49, 51, 52, 68,
+      69, 75, 42, 37, 38, 42, 50, 52, 53, 64, 68, 37, 38, 42, 50, 52, 53, 64,
+      68, 37, 51, 53, 54, 51, 53, 54, 51, 53, 54, 51, 53, 54, 51, 53, 54, 51,
+      53, 51, 52, 54, 55, 56, 57, 59, 51, 52, 54, 55, 56, 57, 59, 51, 52, 54,
+      52, 53, 55, 56, 57, 58, 59, 52, 53, 55, 56, 57, 58, 59, 52, 53, 55, 53,
+      54, 56, 57, 76, 77, 78, 88, 53, 54, 56, 57, 76, 77, 78, 88, 53, 53, 54,
+      55, 57, 58, 77, 78, 79, 88, 53, 54, 55, 57, 58, 77, 78, 79, 53, 54, 55,
+      56, 58, 59, 78, 79, 80, 90, 53, 54, 55, 56, 58, 59, 78, 53, 54, 56, 57,
+      59, 79, 80, 81, 82, 92, 53, 54, 56, 57, 59, 79, 80, 53, 54, 57, 58, 80,
+      81, 82, 92, 53, 54, 57, 58, 80, 81, 82, 92, 53, 0,  1,  2,  3,  4,  33,
+      34, 41, 61, 62, 66, 67, 96, 0,  1,  2,  3,  0,  1,  33, 34, 35, 40, 41,
+      60, 62, 63, 65, 66, 67, 96, 0,  1,  33, 33, 34, 35, 36, 37, 38, 39, 40,
+      41, 60, 61, 63, 64, 65, 66, 67, 96, 35, 36, 37, 38, 39, 40, 51, 52, 61,
+      62, 64, 65, 66, 67, 96, 35, 36, 36, 37, 38, 39, 51, 52, 53, 63, 65, 66,
+      96, 36, 37, 38, 39, 51, 52, 36, 37, 38, 39, 52, 61, 62, 63, 64, 66, 67,
+      96, 36, 37, 38, 39, 52, 41, 60, 61, 62, 63, 64, 65, 67, 96, 41, 60, 61,
+      62, 63, 64, 65, 67, 0,  1,  2,  3,  33, 34, 35, 40, 41, 60, 61, 62, 65,
+      66, 96, 0,  1,  42, 43, 49, 50, 51, 52, 53, 69, 74, 75, 97, 42, 43, 49,
+      50, 51, 52, 42, 43, 44, 48, 49, 50, 51, 68, 70, 71, 73, 74, 75, 97, 42,
+      43, 44, 42, 43, 44, 45, 46, 47, 48, 49, 50, 68, 69, 71, 72, 73, 74, 75,
+      97, 31, 32, 44, 45, 46, 47, 48, 69, 70, 72, 73, 74, 75, 97, 31, 32, 44,
+      28, 29, 30, 31, 32, 45, 46, 47, 70, 71, 73, 74, 97, 28, 29, 30, 31, 29,
+      30, 31, 32, 44, 45, 46, 47, 48, 70, 71, 72, 74, 75, 97, 29, 30, 47, 68,
+      69, 70, 71, 72, 73, 75, 97, 47, 68, 69, 70, 71, 72, 73, 75, 42, 43, 49,
+      50, 52, 68, 69, 70, 71, 72, 73, 74, 97, 42, 43, 49, 50, 6,  7,  8,  9,
+      10, 11, 12, 55, 77, 87, 88, 89, 95, 6,  7,  8,  9,  55, 56, 76, 78, 86,
+      87, 88, 89, 95, 55, 56, 76, 78, 86, 87, 88, 89, 54, 55, 56, 57, 58, 76,
+      77, 79, 80, 85, 86, 87, 88, 89, 90, 94, 95, 54, 55, 56, 57, 58, 59, 77,
+      78, 80, 81, 84, 85, 86, 89, 90, 91, 94, 54, 57, 58, 59, 78, 79, 81, 82,
+      83, 84, 85, 90, 91, 92, 93, 94, 54, 58, 59, 80, 82, 83, 84, 91, 92, 93,
+      58, 59, 80, 82, 83, 84, 91, 92, 20, 21, 22, 23, 24, 25, 26, 59, 81, 83,
+      91, 92, 93, 20, 21, 22, 23, 17, 18, 19, 20, 21, 22, 23, 81, 82, 84, 91,
+      92, 93, 17, 18, 19, 20, 16, 17, 18, 19, 20, 81, 82, 83, 85, 91, 92, 93,
+      94, 16, 17, 18, 19, 14, 15, 16, 17, 18, 83, 84, 86, 87, 90, 93, 94, 95,
+      14, 15, 16, 17, 11, 12, 13, 14, 15, 16, 76, 77, 85, 87, 88, 89, 94, 95,
+      11, 12, 13, 9,  10, 11, 12, 13, 14, 76, 77, 86, 88, 89, 95, 9,  10, 11,
+      12, 13, 7,  8,  9,  10, 11, 12, 13, 55, 76, 77, 86, 87, 89, 95, 7,  8,
+      9,  55, 56, 76, 77, 78, 79, 86, 87, 88, 90, 95, 55, 56, 76, 77, 78, 79,
+      56, 57, 58, 78, 79, 80, 83, 84, 85, 86, 87, 89, 91, 92, 93, 94, 95, 58,
+      59, 79, 80, 81, 82, 83, 84, 85, 90, 92, 93, 94, 58, 59, 79, 80, 19, 20,
+      21, 22, 23, 24, 25, 59, 81, 82, 83, 84, 91, 93, 19, 20, 21, 18, 19, 79,
+      80, 81, 82, 83, 84, 85, 90, 91, 92, 94, 18, 19, 79, 80, 15, 16, 17, 78,
+      79, 80, 83, 84, 85, 86, 87, 89, 90, 91, 93, 95, 15, 13, 14, 15, 76, 77,
+      78, 85, 86, 87, 88, 89, 90, 94, 13, 14, 15, 76, 34, 35, 36, 38, 39, 40,
+      41, 60, 61, 62, 63, 64, 65, 66, 67, 34, 35, 43, 44, 45, 47, 48, 49, 50,
+      68, 69, 70, 71, 72, 73, 74, 75, 43, 44};
+  reverse_index2_map_[98] = {
+      0, 2, 4, 6, 8, 4, 0, 2, 4, 6, 8, 4, 0, 2, 4, 6, 8, 0, 0, 2, 4, 6, 8, 8, 0,
+      0, 2, 4, 6, 8, 8, 0, 0, 2, 1, 1, 0, 2, 4, 6, 1, 1, 0, 2, 4, 6, 1, 1, 0, 2,
+      4, 3, 2, 1, 0, 2, 4, 6, 3, 2, 1, 0, 2, 4, 6, 3, 2, 1, 6, 3, 3, 1, 0, 2, 4,
+      7, 6, 3, 3, 1, 0, 2, 4, 7, 6, 6, 4, 3, 1, 0, 2, 4, 8, 6, 4, 3, 1, 0, 2, 4,
+      8, 6, 7, 5, 3, 1, 0, 2, 4, 9, 7, 5, 3, 1, 0, 2, 4, 9, 7, 6, 5, 3, 1, 0, 2,
+      4, 6, 5, 3, 1, 0, 2, 4, 6, 5, 3, 7, 5, 3, 1, 0, 2, 4, 7, 5, 3, 1, 0, 2, 4,
+      7, 5, 3, 9, 7, 5, 3, 1, 0, 2, 5, 9, 7, 5, 3, 1, 0, 2, 5, 9, 9, 7, 5, 3, 1,
+      0, 2, 5, 8, 9, 7, 5, 3, 1, 0, 2, 5, 7, 5, 3, 1, 0, 2, 5, 9, 9, 7, 5, 3, 1,
+      0, 2, 5, 9, 9, 5, 3, 1, 0, 2, 4, 9, 5, 3, 1, 0, 2, 4, 9, 5, 3, 6, 3, 1, 0,
+      2, 6, 6, 3, 1, 0, 2, 6, 6, 3, 1, 0, 2, 7, 3, 1, 0, 3, 7, 7, 3, 1, 0, 3, 7,
+      7, 3, 1, 0, 3, 6, 3, 1, 1, 3, 6, 6, 3, 1, 1, 3, 6, 6, 3, 1, 1, 3, 7, 3, 1,
+      1, 3, 7, 7, 3, 1, 1, 3, 7, 7, 3, 1, 1, 3, 6, 3, 0, 1, 3, 6, 6, 3, 0, 1, 3,
+      6, 6, 3, 0, 1, 3, 7, 2, 0, 1, 3, 5, 7, 2, 0, 1, 3, 5, 7, 2, 0, 1, 3, 5, 2,
+      0, 1, 3, 5, 5, 2, 0, 1, 3, 5, 5, 2, 0, 1, 3, 4, 2, 0, 1, 3, 5, 8, 4, 2, 0,
+      1, 3, 5, 8, 4, 2, 0, 5, 2, 0, 1, 3, 5, 7, 9, 5, 2, 0, 1, 3, 5, 7, 9, 5, 4,
+      2, 0, 1, 3, 5, 7, 9, 4, 2, 0, 1, 3, 5, 7, 9, 4, 4, 2, 0, 1, 3, 5, 7, 9, 4,
+      2, 0, 1, 3, 5, 7, 9, 4, 4, 2, 0, 1, 3, 5, 7, 4, 2, 0, 1, 3, 5, 7, 4, 2, 0,
+      9, 4, 2, 0, 1, 3, 5, 6, 9, 4, 2, 0, 1, 3, 5, 6, 9, 9, 4, 2, 0, 1, 3, 5, 6,
+      9, 4, 2, 0, 1, 3, 5, 6, 9, 8, 4, 2, 0, 1, 3, 4, 6, 8, 4, 2, 0, 1, 3, 4, 6,
+      8, 6, 4, 2, 0, 1, 3, 3, 5, 6, 4, 2, 0, 1, 3, 3, 5, 6, 6, 4, 2, 0, 1, 2, 3,
+      6, 4, 2, 0, 1, 2, 3, 6, 4, 2, 6, 4, 2, 0, 1, 1, 6, 4, 2, 0, 1, 1, 6, 4, 2,
+      0, 1, 8, 6, 4, 2, 0, 0, 9, 8, 6, 4, 2, 0, 0, 9, 8, 6, 4, 8, 6, 4, 2, 0, 6,
+      8, 6, 4, 2, 0, 6, 8, 6, 4, 2, 0, 2, 4, 5, 8, 3, 1, 6, 2, 4, 5, 8, 3, 1, 6,
+      2, 4, 5, 7, 1, 1, 5, 0, 8, 7, 1, 1, 5, 0, 8, 7, 1, 1, 5, 0, 7, 1, 2, 8, 6,
+      0, 5, 9, 8, 8, 7, 1, 2, 8, 6, 0, 5, 8, 2, 1, 4, 0, 6, 7, 9, 8, 2, 1, 4, 0,
+      6, 7, 9, 8, 1, 0, 5, 5, 7, 1, 0, 5, 5, 7, 1, 0, 5, 5, 7, 1, 0, 4, 0, 2, 2,
+      6, 6, 2, 8, 4, 0, 2, 2, 6, 6, 2, 8, 4, 4, 0, 2, 1, 4, 7, 4, 4, 5, 9, 9, 7,
+      4, 0, 2, 1, 4, 5, 2, 0, 3, 9, 9, 4, 2, 7, 5, 4, 8, 9, 8, 6, 6, 5, 5, 7, 9,
+      0, 0, 3, 3, 2, 6, 7, 5, 7, 9, 0, 0, 3, 3, 2, 5, 0, 6, 7, 2, 5, 0, 6, 7, 2,
+      5, 0, 6, 7, 2, 5, 1, 1, 8, 5, 0, 4, 9, 7, 1, 1, 8, 5, 0, 4, 9, 7, 1, 8, 1,
+      1, 7, 4, 0, 6, 9, 8, 1, 1, 7, 4, 0, 6, 9, 8, 7, 2, 1, 0, 6, 9, 8, 9, 7, 2,
+      1, 0, 6, 9, 8, 9, 7, 8, 5, 4, 2, 2, 1, 6, 8, 5, 4, 2, 2, 1, 6, 8, 5, 4, 9,
+      7, 6, 3, 0, 0, 3, 6, 2, 7, 9, 7, 6, 3, 0, 0, 3, 7, 3, 0, 3, 5, 2, 2, 9, 8,
+      4, 5, 7, 6, 7, 9, 6, 7, 2, 0, 4, 2, 1, 3, 2, 7, 9, 5, 8, 2, 0, 4, 2, 1, 3,
+      0, 4, 3, 1, 5, 2, 6, 8, 0, 4, 3, 1, 5, 2, 6, 8, 0, 5, 6, 5, 5, 1, 5, 8, 8,
+      5, 6, 5, 5, 1, 5, 8, 8, 5, 0, 1, 9, 0, 1, 9, 0, 1, 9, 0, 1, 9, 0, 1, 9, 0,
+      1, 7, 0, 1, 9, 9, 9, 9, 7, 0, 1, 9, 9, 9, 9, 7, 0, 1, 4, 0, 5, 2, 0, 2, 4,
+      4, 0, 5, 2, 0, 2, 4, 4, 0, 5, 6, 5, 0, 8, 6, 6, 9, 6, 6, 5, 0, 8, 6, 6, 9,
+      6, 6, 3, 2, 0, 2, 7, 7, 5, 7, 8, 3, 2, 0, 2, 7, 7, 5, 7, 2, 0, 2, 1, 1, 2,
+      4, 3, 5, 7, 2, 0, 2, 1, 1, 2, 4, 4, 3, 7, 1, 0, 5, 4, 8, 8, 8, 4, 3, 7, 1,
+      0, 5, 4, 7, 4, 7, 0, 9, 6, 6, 6, 7, 4, 7, 0, 9, 6, 6, 6, 7, 4, 5, 6, 7, 8,
+      2, 5, 4, 1, 9, 6, 1, 9, 4, 5, 6, 7, 8, 9, 3, 4, 6, 2, 3, 1, 2, 9, 7, 4, 0,
+      5, 8, 9, 3, 9, 6, 5, 6, 7, 7, 3, 1, 7, 4, 2, 3, 6, 4, 1, 4, 0, 8, 5, 3, 3,
+      1, 8, 8, 9, 7, 3, 1, 0, 5, 8, 3, 8, 5, 8, 4, 2, 8, 4, 3, 9, 1, 1, 7, 8, 8,
+      4, 2, 8, 4, 3, 9, 6, 5, 9, 7, 9, 6, 0, 0, 3, 5, 2, 9, 6, 5, 9, 7, 9, 3, 4,
+      1, 5, 5, 3, 2, 1, 9, 3, 4, 1, 5, 5, 3, 2, 9, 8, 8, 9, 6, 7, 9, 9, 6, 0, 0,
+      5, 6, 2, 4, 9, 8, 4, 8, 8, 2, 3, 2, 8, 1, 8, 1, 9, 4, 8, 8, 2, 3, 2, 3, 5,
+      8, 8, 1, 3, 9, 0, 3, 7, 8, 5, 0, 5, 3, 5, 8, 9, 6, 5, 6, 8, 6, 1, 4, 7, 6,
+      4, 2, 5, 4, 2, 4, 0, 9, 8, 6, 4, 3, 3, 4, 9, 1, 1, 0, 4, 7, 2, 9, 8, 6, 8,
+      7, 7, 5, 4, 5, 2, 5, 8, 1, 1, 6, 7, 8, 7, 7, 5, 9, 8, 8, 9, 9, 7, 4, 7, 9,
+      5, 0, 0, 1, 6, 3, 9, 8, 9, 5, 5, 2, 4, 3, 2, 3, 1, 9, 5, 5, 2, 4, 3, 2, 3,
+      6, 9, 9, 6, 8, 1, 0, 6, 8, 9, 5, 3, 4, 6, 9, 9, 6, 9, 8, 6, 6, 5, 6, 7, 8,
+      4, 2, 0, 8, 7, 9, 8, 6, 6, 1, 5, 2, 7, 5, 3, 2, 0, 3, 1, 5, 2, 7, 5, 3, 2,
+      0, 7, 4, 3, 4, 9, 7, 5, 1, 3, 7, 7, 6, 7, 2, 2, 3, 4, 6, 7, 4, 3, 4, 6, 9,
+      0, 0, 9, 9, 6, 9, 7, 0, 7, 2, 8, 5, 3, 3, 3, 2, 5, 7, 6, 7, 8, 3, 2, 7, 4,
+      4, 8, 5, 1, 6, 2, 3, 5, 0, 2, 3, 5, 1, 6, 2, 3, 5, 0, 2, 7, 6, 6, 6, 7, 8,
+      9, 8, 4, 2, 8, 0, 8, 7, 6, 6, 6, 8, 7, 6, 5, 7, 8, 9, 3, 1, 1, 3, 1, 2, 8,
+      7, 6, 5, 7, 5, 4, 5, 9, 7, 5, 5, 1, 4, 5, 1, 5, 7, 5, 4, 5, 8, 5, 4, 6, 8,
+      8, 2, 2, 8, 4, 9, 0, 9, 8, 5, 4, 6, 9, 8, 4, 4, 6, 8, 5, 8, 2, 5, 5, 4, 6,
+      1, 9, 8, 4, 9, 8, 5, 4, 6, 7, 1, 3, 1, 1, 3, 2, 9, 8, 5, 4, 6, 9, 8, 7, 7,
+      8, 9, 9, 6, 0, 2, 8, 1, 5, 5, 9, 8, 7, 3, 6, 3, 0, 2, 8, 3, 4, 3, 6, 0, 3,
+      6, 3, 0, 2, 8, 8, 6, 8, 1, 0, 1, 9, 6, 3, 6, 9, 6, 6, 9, 7, 1, 8, 6, 5, 6,
+      2, 0, 3, 4, 3, 9, 5, 3, 0, 9, 6, 5, 6, 2, 9, 8, 8, 7, 7, 9, 9, 7, 2, 0, 1,
+      8, 5, 5, 9, 8, 8, 9, 8, 9, 8, 1, 4, 0, 0, 4, 8, 1, 4, 7, 9, 8, 9, 8, 8, 9,
+      9, 6, 4, 7, 7, 4, 0, 4, 7, 9, 1, 9, 6, 6, 8, 8, 9, 9, 4, 1, 8, 5, 0, 0, 4,
+      1, 9, 8, 8, 9, 9, 4, 9, 7, 7, 8, 7, 7, 8, 5, 3, 0, 2, 3, 2, 0, 3, 9, 7, 7,
+      7, 9, 8, 7, 7, 8, 4, 3, 0, 3, 4, 3, 0, 2, 7, 7};
+  max_len_map_[98] = 17;
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PIPNet::Preprocess(Mat *mat, FDTensor *output,
+                        std::map<std::string, std::array<int, 2>> *im_info) {
+  // Resize
+  int resize_w = size_[0];
+  int resize_h = size_[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  // RGR2RGB
+  BGR2RGB::Run(mat);
+
+  // Normalize
+  Normalize::Run(mat, mean_vals_, std_vals_);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool PIPNet::Postprocess(
+    std::vector<FDTensor> &infer_result, FaceAlignmentResult *result,
+    const std::map<std::string, std::array<int, 2>> &im_info) {
+  FDASSERT(infer_result.at(0).shape[0] == 1, "Only support batch = 1 now.");
+  if (infer_result.at(0).dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  auto iter_in = im_info.find("input_shape");
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
+  int in_h = iter_in->second[0];
+  int in_w = iter_in->second[1];
+  GenerateLandmarks(infer_result, result, in_h, in_w);
+
+  return true;
+}
+
+bool PIPNet::Predict(cv::Mat *im, FaceAlignmentResult *result) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<int, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {mat.Height(), mat.Width()};
+  im_info["output_shape"] = {mat.Height(), mat.Width()};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors, result, im_info)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace facealign
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h
new file mode 100755
index 0000000000..55a4f724c0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet.h
@@ -0,0 +1,133 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facealign {
+/*! @brief PIPNet model object used when to load a PIPNet model exported by
+ * PIPNet.
+ */
+class ULTRAINFER_DECL PIPNet : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./pipnet.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  PIPNet(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "PIPNet"; }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, FaceAlignmentResult *result);
+
+  /** \brief Get the number of landmakrs
+   *
+   * \return Integer type, default num_landmarks = 19
+   */
+  int GetNumLandmarks() { return num_landmarks_; }
+  /** \brief Get the mean values for normalization
+   *
+   * \return Vector of float values, default mean_vals = {0.485f, 0.456f,
+   * 0.406f}
+   */
+  std::vector<float> GetMeanVals() { return mean_vals_; }
+  /** \brief Get the std values for normalization
+   *
+   * \return Vector of float values, default std_vals = {0.229f, 0.224f, 0.225f}
+   */
+  std::vector<float> GetStdVals() { return std_vals_; }
+  /** \brief Get the input size of image
+   *
+   * \return Vector of int values, default {256, 256}
+   */
+  std::vector<int> GetSize() { return size_; }
+  /** \brief Set the number of landmarks
+   *
+   * \param[in] num_landmarks Integer value which represents number of landmarks
+   */
+  void SetNumLandmarks(const int &num_landmarks);
+  /** \brief Set the mean values for normalization
+   *
+   * \param[in] mean_vals Vector of float values whose length is equal to 3
+   */
+  void SetMeanVals(const std::vector<float> &mean_vals) {
+    mean_vals_ = mean_vals;
+  }
+  /** \brief Set the std values for normalization
+   *
+   * \param[in] std_vals Vector of float values whose length is equal to 3
+   */
+  void SetStdVals(const std::vector<float> &std_vals) { std_vals_ = std_vals; }
+  /** \brief Set the input size of image
+   *
+   * \param[in] size Vector of int values which represents {width, height} of
+   * image
+   */
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<int, 2>> *im_info);
+
+  bool Postprocess(std::vector<FDTensor> &infer_result,
+                   FaceAlignmentResult *result,
+                   const std::map<std::string, std::array<int, 2>> &im_info);
+  void GenerateLandmarks(std::vector<FDTensor> &infer_result,
+                         FaceAlignmentResult *result, float img_height,
+                         float img_width);
+  std::map<int, int> num_lms_map_;
+  std::map<int, int> max_len_map_;
+  std::map<int, std::vector<int>> reverse_index1_map_;
+  std::map<int, std::vector<int>> reverse_index2_map_;
+  int num_nb_;
+  int net_stride_;
+  // Now PIPNet support num_landmarks in {19, 29, 68, 98}
+  std::vector<int> supported_num_landmarks_;
+  // tuple of (width, height), default (256, 256)
+  std::vector<int> size_;
+
+  // Mean parameters for normalize, size should be the the same as channels,
+  // default mean_vals = {0.485f, 0.456f, 0.406f}
+  std::vector<float> mean_vals_;
+  // Std parameters for normalize, size should be the the same as channels,
+  // default std_vals = {0.229f, 0.224f, 0.225f}
+  std::vector<float> std_vals_;
+  // number of landmarks
+  int num_landmarks_;
+};
+
+} // namespace facealign
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc
new file mode 100755
index 0000000000..cd379c4bf2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/contrib/pipnet_pybind.cc
@@ -0,0 +1,39 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPIPNet(pybind11::module &m) {
+  pybind11::class_<vision::facealign::PIPNet, UltraInferModel>(m, "PIPNet")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facealign::PIPNet &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceAlignmentResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def_property("size", &vision::facealign::PIPNet::GetSize,
+                    &vision::facealign::PIPNet::SetSize)
+      .def_property("mean_vals", &vision::facealign::PIPNet::GetMeanVals,
+                    &vision::facealign::PIPNet::SetMeanVals)
+      .def_property("std_vals", &vision::facealign::PIPNet::GetStdVals,
+                    &vision::facealign::PIPNet::SetStdVals)
+      .def_property("num_landmarks",
+                    &vision::facealign::PIPNet::GetNumLandmarks,
+                    &vision::facealign::PIPNet::SetNumLandmarks);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc
new file mode 100755
index 0000000000..a1472d0ce4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facealign/facealign_pybind.cc
@@ -0,0 +1,29 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindPFLD(pybind11::module &m);
+void BindFaceLandmark1000(pybind11::module &m);
+void BindPIPNet(pybind11::module &m);
+
+void BindFaceAlign(pybind11::module &m) {
+  auto facedet_module = m.def_submodule("facealign", "Face alignment models.");
+  BindPFLD(facedet_module);
+  BindFaceLandmark1000(facedet_module);
+  BindPIPNet(facedet_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc
new file mode 100755
index 0000000000..cd203a1417
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.cc
@@ -0,0 +1,88 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/centerface/centerface.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+CenterFace::CenterFace(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool CenterFace::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool CenterFace::Predict(const cv::Mat &im, FaceDetectionResult *result) {
+  std::vector<FaceDetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool CenterFace::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<FaceDetectionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  FDASSERT(images.size() == 1, "Only support batch = 1 now.");
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h
new file mode 100755
index 0000000000..bd3dcd8ac4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface.h
@@ -0,0 +1,81 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/facedet/contrib/centerface/postprocessor.h"
+#include "ultrainfer/vision/facedet/contrib/centerface/preprocessor.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+/*! @brief CenterFace model object used when to load a CenterFace model exported
+ * by CenterFace.
+ */
+class ULTRAINFER_DECL CenterFace : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./centerface.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  CenterFace(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() { return "centerface"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, FaceDetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<FaceDetectionResult> *results);
+
+  /// Get preprocessor reference of CenterFace
+  virtual CenterFacePreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of CenterFace
+  virtual CenterFacePostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  CenterFacePreprocessor preprocessor_;
+  CenterFacePostprocessor postprocessor_;
+};
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc
new file mode 100755
index 0000000000..d1a930fa62
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/centerface_pybind.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindCenterFace(pybind11::module &m) {
+  pybind11::class_<vision::facedet::CenterFacePreprocessor>(
+      m, "CenterFacePreprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::facedet::CenterFacePreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+             if (!self.Run(&images, &outputs, &ims_info)) {
+               throw std::runtime_error("Failed to preprocess the input data "
+                                        "in CenterFacePreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return make_pair(outputs, ims_info);
+           })
+      .def_property("size", &vision::facedet::CenterFacePreprocessor::GetSize,
+                    &vision::facedet::CenterFacePreprocessor::SetSize);
+
+  pybind11::class_<vision::facedet::CenterFacePostprocessor>(
+      m, "CenterFacePostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::facedet::CenterFacePostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::FaceDetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in CenterFacePostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::facedet::CenterFacePostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::FaceDetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in CenterFacePostprocessor.");
+             }
+             return results;
+           })
+      .def_property("conf_threshold",
+                    &vision::facedet::CenterFacePostprocessor::GetConfThreshold,
+                    &vision::facedet::CenterFacePostprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::facedet::CenterFacePostprocessor::GetNMSThreshold,
+                    &vision::facedet::CenterFacePostprocessor::SetNMSThreshold);
+
+  pybind11::class_<vision::facedet::CenterFace, UltraInferModel>(m,
+                                                                 "CenterFace")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facedet::CenterFace &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::facedet::CenterFace &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::FaceDetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::facedet::CenterFace::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::facedet::CenterFace::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc
new file mode 100755
index 0000000000..f4d4637a6c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.cc
@@ -0,0 +1,151 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/centerface/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+CenterFacePostprocessor::CenterFacePostprocessor() {
+  conf_threshold_ = 0.5;
+  nms_threshold_ = 0.3;
+  landmarks_per_face_ = 5;
+}
+
+bool CenterFacePostprocessor::Run(
+    const std::vector<FDTensor> &infer_result,
+    std::vector<FaceDetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = infer_result[0].shape[0];
+
+  results->resize(batch);
+  FDTensor heatmap = infer_result[0];   //(1 1 160 160)
+  FDTensor scales = infer_result[1];    //(1 2 160 160)
+  FDTensor offsets = infer_result[2];   //(1 2 160 160)
+  FDTensor landmarks = infer_result[3]; //(1 10 160 160)
+  for (size_t bs = 0; bs < batch; ++bs) {
+    (*results)[bs].Clear();
+    (*results)[bs].landmarks_per_face = landmarks_per_face_;
+    (*results)[bs].Reserve(heatmap.shape[2]);
+    if (infer_result[0].dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    int fea_h = heatmap.shape[2];
+    int fea_w = heatmap.shape[3];
+    int spacial_size = fea_w * fea_h;
+
+    float *heatmap_out = static_cast<float *>(heatmap.Data());
+
+    float *scale0 = static_cast<float *>(scales.Data());
+    float *scale1 = scale0 + spacial_size;
+
+    float *offset0 = static_cast<float *>(offsets.Data());
+    float *offset1 = offset0 + spacial_size;
+    float confidence = 0.f;
+
+    std::vector<int> ids;
+    for (int i = 0; i < fea_h; i++) {
+      for (int j = 0; j < fea_w; j++) {
+        if (heatmap_out[i * fea_w + j] > conf_threshold_) {
+          ids.push_back(i);
+          ids.push_back(j);
+        }
+      }
+    }
+
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+             "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale_h = ipt_h / out_h;
+    float scale_w = ipt_w / out_w;
+
+    for (int i = 0; i < ids.size() / 2; i++) {
+      int id_h = ids[2 * i];
+      int id_w = ids[2 * i + 1];
+      int index = id_h * fea_w + id_w;
+      confidence = heatmap_out[index];
+
+      float s0 = std::exp(scale0[index]) * 4;
+      float s1 = std::exp(scale1[index]) * 4;
+      float o0 = offset0[index];
+      float o1 = offset1[index];
+
+      float x1 = (id_w + o1 + 0.5) * 4 - s1 / 2 > 0.f
+                     ? (id_w + o1 + 0.5) * 4 - s1 / 2
+                     : 0;
+      float y1 = (id_h + o0 + 0.5) * 4 - s0 / 2 > 0
+                     ? (id_h + o0 + 0.5) * 4 - s0 / 2
+                     : 0;
+      float x2 = 0, y2 = 0;
+      x1 = x1 < (float)out_w ? x1 : (float)out_w;
+      y1 = y1 < (float)out_h ? y1 : (float)out_h;
+      x2 = x1 + s1 < (float)out_w ? x1 + s1 : (float)out_w;
+      y2 = y1 + s0 < (float)out_h ? y1 + s0 : (float)out_h;
+
+      (*results)[bs].boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
+      (*results)[bs].scores.push_back(confidence);
+      // decode landmarks (default 5 landmarks)
+      if (landmarks_per_face_ > 0) {
+        // reference: utils/box_utils.py#L241
+        for (size_t j = 0; j < landmarks_per_face_; j++) {
+          float *xmap = (float *)landmarks.Data() + (2 * j + 1) * spacial_size;
+          float *ymap = (float *)landmarks.Data() + (2 * j) * spacial_size;
+          float lx = (x1 + xmap[index] * s1) * scale_w;
+          float ly = (y1 + ymap[index] * s0) * scale_h;
+          (*results)[bs].landmarks.emplace_back(std::array<float, 2>{lx, ly});
+        }
+      }
+    }
+
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    utils::NMS(&((*results)[bs]), nms_threshold_);
+
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      (*results)[bs].boxes[i][0] =
+          std::max((*results)[bs].boxes[i][0] * scale_w, 0.0f);
+      (*results)[bs].boxes[i][1] =
+          std::max((*results)[bs].boxes[i][1] * scale_h, 0.0f);
+      (*results)[bs].boxes[i][2] =
+          std::max((*results)[bs].boxes[i][2] * scale_w, 0.0f);
+      (*results)[bs].boxes[i][3] =
+          std::max((*results)[bs].boxes[i][3] * scale_h, 0.0f);
+      (*results)[bs].boxes[i][0] =
+          std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][1] =
+          std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f);
+      (*results)[bs].boxes[i][2] =
+          std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][3] =
+          std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f);
+    }
+  }
+  return true;
+}
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h
new file mode 100755
index 0000000000..03f33f3704
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/postprocessor.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+class ULTRAINFER_DECL CenterFacePostprocessor {
+public:
+  /*! @brief Postprocessor object for CenterFace serials model.
+   */
+  CenterFacePostprocessor();
+
+  /** \brief Process the result of runtime and fill to FaceDetectionResult
+   * structure
+   *
+   * \param[in] infer_result The inference result from runtime
+   * \param[in] results The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &infer_result,
+      std::vector<FaceDetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.5
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.5
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.3
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.3
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  int landmarks_per_face_;
+};
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc
new file mode 100755
index 0000000000..9f4b5b1ecc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.cc
@@ -0,0 +1,81 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/centerface/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+CenterFacePreprocessor::CenterFacePreprocessor() { size_ = {640, 640}; }
+
+bool CenterFacePreprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); i++) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+bool CenterFacePreprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+
+  // centerface's preprocess steps
+  // 1. Resize
+  // 2. ConvertAndPermute
+  Resize::Run(mat, size_[0], size_[1]);
+  std::vector<float> alpha = {1.0f, 1.0f, 1.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0);
+  return true;
+}
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h
new file mode 100755
index 0000000000..d84d690b1e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/centerface/preprocessor.h
@@ -0,0 +1,59 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+class ULTRAINFER_DECL CenterFacePreprocessor {
+public:
+  /** \brief Create a preprocessor instance for CenterFace serials model
+   */
+  CenterFacePreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \ret
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+};
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc
new file mode 100755
index 0000000000..878b1c70d8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.cc
@@ -0,0 +1,293 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/retinaface.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+struct RetinaAnchor {
+  float cx;
+  float cy;
+  float s_kx;
+  float s_ky;
+};
+
+void GenerateRetinaAnchors(const std::vector<int> &size,
+                           const std::vector<int> &downsample_strides,
+                           const std::vector<std::vector<int>> &min_sizes,
+                           std::vector<RetinaAnchor> *anchors) {
+  // size: tuple of input (width, height)
+  // downsample_strides: downsample strides (steps), e.g (8,16,32)
+  // min_sizes: width and height for each anchor,
+  // e.g {{16, 32}, {64, 128}, {256, 512}}
+  int h = size[1];
+  int w = size[0];
+  std::vector<std::vector<int>> feature_maps;
+  for (auto s : downsample_strides) {
+    feature_maps.push_back(
+        {static_cast<int>(
+             std::ceil(static_cast<float>(h) / static_cast<float>(s))),
+         static_cast<int>(
+             std::ceil(static_cast<float>(w) / static_cast<float>(s)))});
+  }
+
+  (*anchors).clear();
+  const size_t num_feature_map = feature_maps.size();
+  // reference: layers/functions/prior_box.py#L21
+  for (size_t k = 0; k < num_feature_map; ++k) {
+    auto f_map = feature_maps.at(k);      // e.g [640//8,640//8]
+    auto tmp_min_sizes = min_sizes.at(k); // e.g [8,16]
+    int f_h = f_map.at(0);
+    int f_w = f_map.at(1);
+    for (size_t i = 0; i < f_h; ++i) {
+      for (size_t j = 0; j < f_w; ++j) {
+        for (auto min_size : tmp_min_sizes) {
+          float s_kx =
+              static_cast<float>(min_size) / static_cast<float>(w); // e.g 16/w
+          float s_ky =
+              static_cast<float>(min_size) / static_cast<float>(h); // e.g 16/h
+          // (x + 0.5) * step / w normalized loc mapping to input width
+          // (y + 0.5) * step / h normalized loc mapping to input height
+          float s = static_cast<float>(downsample_strides.at(k));
+          float cx = (static_cast<float>(j) + 0.5f) * s / static_cast<float>(w);
+          float cy = (static_cast<float>(i) + 0.5f) * s / static_cast<float>(h);
+          (*anchors).emplace_back(
+              RetinaAnchor{cx, cy, s_kx, s_ky}); // without clip
+        }
+      }
+    }
+  }
+}
+
+RetinaFace::RetinaFace(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool RetinaFace::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  variance = {0.1f, 0.2f};
+  downsample_strides = {8, 16, 32};
+  min_sizes = {{16, 32}, {64, 128}, {256, 512}};
+  landmarks_per_face = 5;
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  return true;
+}
+
+bool RetinaFace::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // retinaface's preprocess steps
+  // 1. Resize
+  // 2. Convert(opencv style) or Normalize
+  // 3. HWC->CHW
+  int resize_w = size[0];
+  int resize_h = size[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  // Compute `result = mat * alpha + beta` directly by channel
+  // Reference: detect.py#L94
+  std::vector<float> alpha = {1.f, 1.f, 1.f};
+  std::vector<float> beta = {-104.f, -117.f, -123.f}; // BGR;
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool RetinaFace::Postprocess(
+    std::vector<FDTensor> &infer_result, FaceDetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  // retinaface has 3 output tensors, boxes & conf & landmarks
+  FDASSERT(
+      (infer_result.size() == 3),
+      "The default number of output tensor must be 3 according to retinaface.");
+  FDTensor &boxes_tensor = infer_result.at(0);     // (1,n,4)
+  FDTensor &conf_tensor = infer_result.at(1);      // (1,n,2)
+  FDTensor &landmarks_tensor = infer_result.at(2); // (1,n,10)
+  FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now.");
+  if (boxes_tensor.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  result->Clear();
+  // must be setup landmarks_per_face before reserve
+  result->landmarks_per_face = landmarks_per_face;
+  result->Reserve(boxes_tensor.shape[1]);
+
+  float *boxes_ptr = static_cast<float *>(boxes_tensor.Data());
+  float *conf_ptr = static_cast<float *>(conf_tensor.Data());
+  float *landmarks_ptr = static_cast<float *>(landmarks_tensor.Data());
+  const size_t num_bboxes = boxes_tensor.shape[1]; // n
+  // fetch original image shape
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT((iter_ipt != im_info.end()),
+           "Cannot find input_shape from im_info.");
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+
+  // generate anchors with dowmsample strides
+  std::vector<RetinaAnchor> anchors;
+  GenerateRetinaAnchors(size, downsample_strides, min_sizes, &anchors);
+
+  // decode bounding boxes
+  for (size_t i = 0; i < num_bboxes; ++i) {
+    float confidence = conf_ptr[2 * i + 1];
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    float prior_cx = anchors.at(i).cx;
+    float prior_cy = anchors.at(i).cy;
+    float prior_s_kx = anchors.at(i).s_kx;
+    float prior_s_ky = anchors.at(i).s_ky;
+
+    // fetch offsets (dx,dy,dw,dh)
+    float dx = boxes_ptr[4 * i + 0];
+    float dy = boxes_ptr[4 * i + 1];
+    float dw = boxes_ptr[4 * i + 2];
+    float dh = boxes_ptr[4 * i + 3];
+    // reference: Pytorch_Retinaface/utils/box_utils.py
+    float x = prior_cx + dx * variance[0] * prior_s_kx;
+    float y = prior_cy + dy * variance[0] * prior_s_ky;
+    float w = prior_s_kx * std::exp(dw * variance[1]);
+    float h = prior_s_ky * std::exp(dh * variance[1]); // (0.~1.)
+    // from (x,y,w,h) to (x1,y1,x2,y2)
+    float x1 = (x - w / 2.f) * ipt_w;
+    float y1 = (y - h / 2.f) * ipt_h;
+    float x2 = (x + w / 2.f) * ipt_w;
+    float y2 = (y + h / 2.f) * ipt_h;
+    result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
+    result->scores.push_back(confidence);
+    // decode landmarks (default 5 landmarks)
+    if (landmarks_per_face > 0) {
+      // reference: utils/box_utils.py#L241
+      for (size_t j = 0; j < landmarks_per_face * 2; j += 2) {
+        float ldx = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 0)];
+        float ldy = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 1)];
+        float lx = (prior_cx + ldx * variance[0] * prior_s_kx) * ipt_w;
+        float ly = (prior_cy + ldy * variance[0] * prior_s_ky) * ipt_h;
+        result->landmarks.emplace_back(std::array<float, 2>{lx, ly});
+      }
+    }
+  }
+
+  if (result->boxes.size() == 0) {
+    return true;
+  }
+
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale and clip box
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f);
+    result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f);
+    result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f);
+    result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  // scale and clip landmarks
+  for (size_t i = 0; i < result->landmarks.size(); ++i) {
+    result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f);
+    result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f);
+    result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f);
+    result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool RetinaFace::Predict(cv::Mat *im, FaceDetectionResult *result,
+                         float conf_threshold, float nms_iou_threshold) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors, result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h
new file mode 100755
index 0000000000..12817516ce
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+/** \brief All object face detection model APIs are defined inside this
+ * namespace
+ *
+ */
+namespace facedet {
+/*! @brief RetinaFace model object used when to load a RetinaFace model exported
+ * by RetinaFace.
+ */
+class ULTRAINFER_DECL RetinaFace : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./retinaface.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  RetinaFace(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "Pytorch_Retinaface"; }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \param[in] conf_threshold
+   * confidence threashold for postprocessing, default is 0.25 \param[in]
+   * nms_iou_threshold iou threashold for NMS, default is 0.4 \return true if
+   * the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, FaceDetectionResult *result,
+                       float conf_threshold = 0.25f,
+                       float nms_iou_threshold = 0.4f);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default (640, 640)
+  */
+  std::vector<int> size;
+  /*! @brief
+  Argument for image postprocessing step, variance in RetinaFace's
+  prior-box(anchor) generate process, default (0.1, 0.2)
+  */
+  std::vector<float> variance;
+  /*! @brief
+  Argument for image postprocessing step, downsample strides (namely, steps) for
+  RetinaFace to generate anchors, will take (8,16,32) as default values
+  */
+  std::vector<int> downsample_strides;
+  /*! @brief
+  Argument for image postprocessing step, min sizes, width and height for each
+  anchor, default min_sizes = {{16, 32}, {64, 128}, {256, 512}}
+  */
+  std::vector<std::vector<int>> min_sizes;
+  /*! @brief
+  Argument for image postprocessing step, landmarks_per_face, default 5 in
+  RetinaFace
+  */
+  int landmarks_per_face;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(std::vector<FDTensor> &infer_result,
+                   FaceDetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  bool IsDynamicInput() const { return is_dynamic_input_; }
+
+  bool is_dynamic_input_;
+};
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc
new file mode 100755
index 0000000000..dd63bf7430
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/retinaface_pybind.cc
@@ -0,0 +1,39 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindRetinaFace(pybind11::module &m) {
+  pybind11::class_<vision::facedet::RetinaFace, UltraInferModel>(m,
+                                                                 "RetinaFace")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facedet::RetinaFace &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::facedet::RetinaFace::size)
+      .def_readwrite("variance", &vision::facedet::RetinaFace::variance)
+      .def_readwrite("downsample_strides",
+                     &vision::facedet::RetinaFace::downsample_strides)
+      .def_readwrite("min_sizes", &vision::facedet::RetinaFace::min_sizes)
+      .def_readwrite("landmarks_per_face",
+                     &vision::facedet::RetinaFace::landmarks_per_face);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc
new file mode 100755
index 0000000000..83ec974117
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.cc
@@ -0,0 +1,375 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/scrfd.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+void SCRFD::LetterBox(Mat *mat, const std::vector<int> &size,
+                      const std::vector<float> &color, bool _auto,
+                      bool scale_fill, bool scale_up, int stride) {
+  float scale =
+      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+SCRFD::SCRFD(const std::string &model_file, const std::string &params_file,
+             const RuntimeOption &custom_option,
+             const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool SCRFD::Initialize() {
+  // parameters for preprocess
+  use_kps = true;
+  size = {640, 640};
+  padding_value = {0.0, 0.0, 0.0};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  downsample_strides = {8, 16, 32};
+  num_anchors = 2;
+  landmarks_per_face = 5;
+  center_points_is_update_ = false;
+  max_nms = 30000;
+  // num_outputs = use_kps ? 9 : 6;
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+
+  return true;
+}
+
+bool SCRFD::Preprocess(Mat *mat, FDTensor *output,
+                       std::map<std::string, std::array<float, 2>> *im_info) {
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_LINEAR;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(mat->Height() * ratio);
+    int resize_w = int(mat->Width() * ratio);
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  // scrfd's preprocess steps
+  // 1. letterbox
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  SCRFD::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
+                   is_scale_up, stride);
+
+  BGR2RGB::Run(mat);
+  if (!disable_normalize_) {
+    // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+    //                std::vector<float>(mat->Channels(), 1.0));
+    // Compute `result = mat * alpha + beta` directly by channel
+    // Original Repo/tools/scrfd.py: cv2.dnn.blobFromImage(img, 1.0/128,
+    // input_size, (127.5, 127.5, 127.5), swapRB=True)
+    std::vector<float> alpha = {1.f / 128.f, 1.f / 128.f, 1.f / 128.f};
+    std::vector<float> beta = {-127.5f / 128.f, -127.5f / 128.f,
+                               -127.5f / 128.f};
+    Convert::Run(mat, alpha, beta);
+  }
+
+  if (!disable_permute_) {
+    HWC2CHW::Run(mat);
+    Cast::Run(mat, "float");
+  }
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+void SCRFD::GeneratePoints() {
+  if (center_points_is_update_ && !is_dynamic_input_) {
+    return;
+  }
+  // 8, 16, 32
+  for (auto local_stride : downsample_strides) {
+    unsigned int num_grid_w = size[0] / local_stride;
+    unsigned int num_grid_h = size[1] / local_stride;
+    // y
+    for (unsigned int i = 0; i < num_grid_h; ++i) {
+      // x
+      for (unsigned int j = 0; j < num_grid_w; ++j) {
+        // num_anchors, col major
+        for (unsigned int k = 0; k < num_anchors; ++k) {
+          SCRFDPoint point;
+          point.cx = static_cast<float>(j);
+          point.cy = static_cast<float>(i);
+          center_points_[local_stride].push_back(point);
+        }
+      }
+    }
+  }
+
+  center_points_is_update_ = true;
+}
+
+bool SCRFD::Postprocess(
+    std::vector<FDTensor> &infer_result, FaceDetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  // number of downsample_strides
+  int fmc = downsample_strides.size();
+  // scrfd has 6,9,10,15 output tensors
+  FDASSERT((infer_result.size() == 9 || infer_result.size() == 6 ||
+            infer_result.size() == 10 || infer_result.size() == 15),
+           "The default number of output tensor must be 6, 9, 10, or 15 "
+           "according to scrfd.");
+  FDASSERT((fmc == 3 || fmc == 5), "The fmc must be 3 or 5");
+  FDASSERT((infer_result.at(0).shape[0] == 1), "Only support batch =1 now.");
+  for (int i = 0; i < fmc; ++i) {
+    if (infer_result.at(i).dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+  }
+  int total_num_boxes = 0;
+  // compute the reserve space.
+  for (int f = 0; f < fmc; ++f) {
+    total_num_boxes += infer_result.at(f).shape[1];
+  };
+  GeneratePoints();
+  result->Clear();
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  if (!is_scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+  float pad_h = (out_h - ipt_h * scale) / 2.0f;
+  float pad_w = (out_w - ipt_w * scale) / 2.0f;
+  if (is_mini_pad) {
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  // must be setup landmarks_per_face before reserve
+  if (use_kps) {
+    result->landmarks_per_face = landmarks_per_face;
+  } else {
+    // force landmarks_per_face = 0, if use_kps has been set as 'false'.
+    result->landmarks_per_face = 0;
+  }
+
+  result->Reserve(total_num_boxes);
+  unsigned int count = 0;
+  // loop each stride
+  for (int f = 0; f < fmc; ++f) {
+    float *score_ptr = static_cast<float *>(infer_result.at(f).Data());
+    float *bbox_ptr = static_cast<float *>(infer_result.at(f + fmc).Data());
+    const unsigned int num_points = infer_result.at(f).shape[1];
+    int current_stride = downsample_strides[f];
+    auto &stride_points = center_points_[current_stride];
+    // loop each anchor
+    for (unsigned int i = 0; i < num_points; ++i) {
+      const float cls_conf = score_ptr[i];
+      if (cls_conf < conf_threshold)
+        continue; // filter
+      auto &point = stride_points.at(i);
+      const float cx = point.cx; // cx
+      const float cy = point.cy; // cy
+      // bbox
+      const float *offsets = bbox_ptr + i * 4;
+      float l = offsets[0]; // left
+      float t = offsets[1]; // top
+      float r = offsets[2]; // right
+      float b = offsets[3]; // bottom
+
+      float x1 = ((cx - l) * static_cast<float>(current_stride) -
+                  static_cast<float>(pad_w)) /
+                 scale; // cx - l x1
+      float y1 = ((cy - t) * static_cast<float>(current_stride) -
+                  static_cast<float>(pad_h)) /
+                 scale; // cy - t y1
+      float x2 = ((cx + r) * static_cast<float>(current_stride) -
+                  static_cast<float>(pad_w)) /
+                 scale; // cx + r x2
+      float y2 = ((cy + b) * static_cast<float>(current_stride) -
+                  static_cast<float>(pad_h)) /
+                 scale; // cy + b y2
+      result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
+      result->scores.push_back(cls_conf);
+      if (use_kps) {
+        float *landmarks_ptr =
+            static_cast<float *>(infer_result.at(f + 2 * fmc).Data());
+        // landmarks
+        const float *kps_offsets = landmarks_ptr + i * (landmarks_per_face * 2);
+        for (unsigned int j = 0; j < landmarks_per_face * 2; j += 2) {
+          float kps_l = kps_offsets[j];
+          float kps_t = kps_offsets[j + 1];
+          float kps_x = ((cx + kps_l) * static_cast<float>(current_stride) -
+                         static_cast<float>(pad_w)) /
+                        scale; // cx + l x
+          float kps_y = ((cy + kps_t) * static_cast<float>(current_stride) -
+                         static_cast<float>(pad_h)) /
+                        scale; // cy + t y
+          result->landmarks.emplace_back(std::array<float, 2>{kps_x, kps_y});
+        }
+      }
+      count += 1; // limit boxes for nms.
+      if (count > max_nms) {
+        break;
+      }
+    }
+  }
+
+  // fetch original image shape
+  FDASSERT((iter_ipt != im_info.end()),
+           "Cannot find input_shape from im_info.");
+
+  if (result->boxes.size() == 0) {
+    return true;
+  }
+
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale and clip box
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f);
+    result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f);
+    result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f);
+    result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  // scale and clip landmarks
+  if (use_kps) {
+    for (size_t i = 0; i < result->landmarks.size(); ++i) {
+      result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f);
+      result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f);
+      result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f);
+      result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f);
+    }
+  }
+  return true;
+}
+
+bool SCRFD::Predict(cv::Mat *im, FaceDetectionResult *result,
+                    float conf_threshold, float nms_iou_threshold) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors, result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+void SCRFD::DisableNormalize() { disable_normalize_ = true; }
+
+void SCRFD::DisablePermute() { disable_permute_ = true; }
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h
new file mode 100755
index 0000000000..1b6ae30eac
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd.h
@@ -0,0 +1,142 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include <unordered_map>
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+/*! @brief SCRFD model object used when to load a SCRFD model exported by SCRFD.
+ */
+class ULTRAINFER_DECL SCRFD : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./scrfd.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  SCRFD(const std::string &model_file, const std::string &params_file = "",
+        const RuntimeOption &custom_option = RuntimeOption(),
+        const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "scrfd"; }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \param[in] conf_threshold
+   * confidence threashold for postprocessing, default is 0.25 \param[in]
+   * nms_iou_threshold iou threashold for NMS, default is 0.4 \return true if
+   * the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, FaceDetectionResult *result,
+                       float conf_threshold = 0.25f,
+                       float nms_iou_threshold = 0.4f);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default (640, 640)
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+  /*! @brief
+  Argument for image postprocessing step, downsample strides (namely, steps) for
+  SCRFD to generate anchors, will take (8,16,32) as default values
+  */
+  std::vector<int> downsample_strides;
+  /*! @brief
+  Argument for image postprocessing step, landmarks_per_face, default 5 in SCRFD
+  */
+  int landmarks_per_face;
+  /*! @brief
+  Argument for image postprocessing step, the outputs of onnx file with key
+  points features or not, default true
+  */
+  bool use_kps;
+  /*! @brief
+  Argument for image postprocessing step, the upperbond number of boxes
+  processed by nms, default 30000
+  */
+  int max_nms;
+  /*! @brief
+  Argument for image postprocessing step, anchor number of each stride, default
+  2
+  */
+  unsigned int num_anchors;
+
+  /// This function will disable normalize and hwc2chw in preprocessing step.
+  void DisableNormalize();
+
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute();
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(std::vector<FDTensor> &infer_result,
+                   FaceDetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  void GeneratePoints();
+
+  void LetterBox(Mat *mat, const std::vector<int> &size,
+                 const std::vector<float> &color, bool _auto,
+                 bool scale_fill = false, bool scale_up = true,
+                 int stride = 32);
+
+  bool is_dynamic_input_;
+
+  bool center_points_is_update_;
+
+  typedef struct {
+    float cx;
+    float cy;
+  } SCRFDPoint;
+
+  std::unordered_map<int, std::vector<SCRFDPoint>> center_points_;
+
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+};
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc
new file mode 100755
index 0000000000..26650759a1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/scrfd_pybind.cc
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindSCRFD(pybind11::module &m) {
+  // Bind SCRFD
+  pybind11::class_<vision::facedet::SCRFD, UltraInferModel>(m, "SCRFD")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facedet::SCRFD &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def("disable_normalize", &vision::facedet::SCRFD::DisableNormalize)
+      .def("disable_permute", &vision::facedet::SCRFD::DisablePermute)
+      .def_readwrite("size", &vision::facedet::SCRFD::size)
+      .def_readwrite("padding_value", &vision::facedet::SCRFD::padding_value)
+      .def_readwrite("is_mini_pad", &vision::facedet::SCRFD::is_mini_pad)
+      .def_readwrite("is_no_pad", &vision::facedet::SCRFD::is_no_pad)
+      .def_readwrite("is_scale_up", &vision::facedet::SCRFD::is_scale_up)
+      .def_readwrite("stride", &vision::facedet::SCRFD::stride)
+      .def_readwrite("use_kps", &vision::facedet::SCRFD::use_kps)
+      .def_readwrite("max_nms", &vision::facedet::SCRFD::max_nms)
+      .def_readwrite("downsample_strides",
+                     &vision::facedet::SCRFD::downsample_strides)
+      .def_readwrite("num_anchors", &vision::facedet::SCRFD::num_anchors)
+      .def_readwrite("landmarks_per_face",
+                     &vision::facedet::SCRFD::landmarks_per_face);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc
new file mode 100755
index 0000000000..b4e7e991d6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.cc
@@ -0,0 +1,203 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/ultraface.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+UltraFace::UltraFace(const std::string &model_file,
+                     const std::string &params_file,
+                     const RuntimeOption &custom_option,
+                     const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool UltraFace::Initialize() {
+  // parameters for preprocess
+  size = {320, 240};
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  return true;
+}
+
+bool UltraFace::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // ultraface's preprocess steps
+  // 1. resize
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  int resize_w = size[0];
+  int resize_h = size[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  BGR2RGB::Run(mat);
+  // Compute `result = mat * alpha + beta` directly by channel
+  // Reference: detect_imgs_onnx.py#L73
+  std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
+  std::vector<float> beta = {-127.0f * (1.0f / 128.0f),
+                             -127.0f * (1.0f / 128.0f),
+                             -127.0f * (1.0f / 128.0f)}; // RGB;
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool UltraFace::Postprocess(
+    std::vector<FDTensor> &infer_result, FaceDetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  // ultraface has 2 output tensors, scores & boxes
+  FDASSERT(
+      (infer_result.size() == 2),
+      "The default number of output tensor must be 2 according to ultraface.");
+  FDTensor &scores_tensor = infer_result.at(0); // (1,4420,2)
+  FDTensor &boxes_tensor = infer_result.at(1);  // (1,4420,4)
+  FDASSERT((scores_tensor.shape[0] == 1), "Only support batch =1 now.");
+  FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now.");
+  if (scores_tensor.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  if (boxes_tensor.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  result->Clear();
+  // must be setup landmarks_per_face before reserve.
+  // ultraface detector does not detect landmarks by default.
+  result->landmarks_per_face = 0;
+  result->Reserve(boxes_tensor.shape[1]);
+
+  float *scores_ptr = static_cast<float *>(scores_tensor.Data());
+  float *boxes_ptr = static_cast<float *>(boxes_tensor.Data());
+  const size_t num_bboxes = boxes_tensor.shape[1]; // e.g 4420
+  // fetch original image shape
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT((iter_ipt != im_info.end()),
+           "Cannot find input_shape from im_info.");
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+
+  // decode bounding boxes
+  for (size_t i = 0; i < num_bboxes; ++i) {
+    float confidence = scores_ptr[2 * i + 1];
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    float x1 = boxes_ptr[4 * i + 0] * ipt_w;
+    float y1 = boxes_ptr[4 * i + 1] * ipt_h;
+    float x2 = boxes_ptr[4 * i + 2] * ipt_w;
+    float y2 = boxes_ptr[4 * i + 3] * ipt_h;
+    result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
+    result->scores.push_back(confidence);
+  }
+
+  if (result->boxes.size() == 0) {
+    return true;
+  }
+
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale and clip box
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f);
+    result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f);
+    result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f);
+    result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool UltraFace::Predict(cv::Mat *im, FaceDetectionResult *result,
+                        float conf_threshold, float nms_iou_threshold) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors, result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h
new file mode 100755
index 0000000000..2ca7d60994
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface.h
@@ -0,0 +1,83 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+/*! @brief UltraFace model object used when to load a UltraFace model exported
+ * by UltraFace.
+ */
+class ULTRAINFER_DECL UltraFace : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./ultraface.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  UltraFace(const std::string &model_file, const std::string &params_file = "",
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const {
+    return "Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB";
+  }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \param[in] conf_threshold
+   * confidence threashold for postprocessing, default is 0.7 \param[in]
+   * nms_iou_threshold iou threashold for NMS, default is 0.3 \return true if
+   * the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, FaceDetectionResult *result,
+                       float conf_threshold = 0.7f,
+                       float nms_iou_threshold = 0.3f);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default (320, 240)
+  */
+  std::vector<int> size;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(std::vector<FDTensor> &infer_result,
+                   FaceDetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  bool IsDynamicInput() const { return is_dynamic_input_; }
+
+  bool is_dynamic_input_;
+};
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc
new file mode 100755
index 0000000000..dd6ad88e8b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/ultraface_pybind.cc
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindUltraFace(pybind11::module &m) {
+  pybind11::class_<vision::facedet::UltraFace, UltraInferModel>(m, "UltraFace")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facedet::UltraFace &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::facedet::UltraFace::size);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc
new file mode 100755
index 0000000000..3848c131c3
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.cc
@@ -0,0 +1,280 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/yolov5face.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+void LetterBox(Mat *mat, std::vector<int> size, std::vector<float> color,
+               bool _auto, bool scale_fill = false, bool scale_up = true,
+               int stride = 32) {
+  float scale =
+      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
+  if (!scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size[0] - resize_w;
+  int pad_h = size[1] - resize_h;
+  if (_auto) {
+    pad_h = pad_h % stride;
+    pad_w = pad_w % stride;
+  } else if (scale_fill) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size[1];
+    resize_w = size[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, color);
+  }
+}
+
+YOLOv5Face::YOLOv5Face(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv5Face::Initialize() {
+  // parameters for preprocess
+  size = {640, 640};
+  padding_value = {114.0, 114.0, 114.0};
+  is_mini_pad = false;
+  is_no_pad = false;
+  is_scale_up = false;
+  stride = 32;
+  landmarks_per_face = 5;
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // Check if the input shape is dynamic after Runtime already initialized,
+  // Note that, We need to force is_mini_pad 'false' to keep static
+  // shape after padding (LetterBox) when the is_dynamic_input_ is 'false'.
+  is_dynamic_input_ = false;
+  auto shape = InputInfoOfRuntime(0).shape;
+  for (int i = 0; i < shape.size(); ++i) {
+    // if height or width is dynamic
+    if (i >= 2 && shape[i] <= 0) {
+      is_dynamic_input_ = true;
+      break;
+    }
+  }
+  if (!is_dynamic_input_) {
+    is_mini_pad = false;
+  }
+  return true;
+}
+
+bool YOLOv5Face::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // process after image load
+  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
+                         size[0] * 1.0f / static_cast<float>(mat->Width()));
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
+    int interp = cv::INTER_LINEAR;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(round(static_cast<float>(mat->Height()) * ratio));
+    int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  // yolov5face's preprocess steps
+  // 1. letterbox
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
+            stride);
+  BGR2RGB::Run(mat);
+  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
+  //                std::vector<float>(mat->Channels(), 1.0));
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool YOLOv5Face::Postprocess(
+    FDTensor &infer_result, FaceDetectionResult *result,
+    const std::map<std::string, std::array<float, 2>> &im_info,
+    float conf_threshold, float nms_iou_threshold) {
+  // infer_result: (1,n,16) 16=4+1+10+1
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  result->Clear();
+  // must be setup landmarks_per_face before reserve
+  result->landmarks_per_face = landmarks_per_face;
+  result->Reserve(infer_result.shape[1]);
+
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+    float *reg_cls_ptr = data + (i * infer_result.shape[2]);
+    float obj_conf = reg_cls_ptr[4];
+    float cls_conf = reg_cls_ptr[15];
+    float confidence = obj_conf * cls_conf;
+    // filter boxes by conf_threshold
+    if (confidence <= conf_threshold) {
+      continue;
+    }
+    float x = reg_cls_ptr[0];
+    float y = reg_cls_ptr[1];
+    float w = reg_cls_ptr[2];
+    float h = reg_cls_ptr[3];
+
+    // convert from [x, y, w, h] to [x1, y1, x2, y2]
+    result->boxes.emplace_back(std::array<float, 4>{
+        (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)});
+    result->scores.push_back(confidence);
+    // decode landmarks (default 5 landmarks)
+    if (landmarks_per_face > 0) {
+      float *landmarks_ptr = reg_cls_ptr + 5;
+      for (size_t j = 0; j < landmarks_per_face * 2; j += 2) {
+        result->landmarks.emplace_back(
+            std::array<float, 2>{landmarks_ptr[j], landmarks_ptr[j + 1]});
+      }
+    }
+  }
+
+  if (result->boxes.size() == 0) {
+    return true;
+  }
+
+  utils::NMS(result, nms_iou_threshold);
+
+  // scale the boxes to the origin image shape
+  auto iter_out = im_info.find("output_shape");
+  auto iter_ipt = im_info.find("input_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  float out_h = iter_out->second[0];
+  float out_w = iter_out->second[1];
+  float ipt_h = iter_ipt->second[0];
+  float ipt_w = iter_ipt->second[1];
+  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+  if (!is_scale_up) {
+    scale = std::min(scale, 1.0f);
+  }
+  float pad_h = (out_h - ipt_h * scale) / 2.f;
+  float pad_w = (out_w - ipt_w * scale) / 2.f;
+  if (is_mini_pad) {
+    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
+    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
+  }
+  // scale and clip box
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
+    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
+    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
+    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
+    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
+    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
+    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
+    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
+  }
+  // scale and clip landmarks
+  for (size_t i = 0; i < result->landmarks.size(); ++i) {
+    result->landmarks[i][0] =
+        std::max((result->landmarks[i][0] - pad_w) / scale, 0.0f);
+    result->landmarks[i][1] =
+        std::max((result->landmarks[i][1] - pad_h) / scale, 0.0f);
+    result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f);
+    result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f);
+  }
+  return true;
+}
+
+bool YOLOv5Face::Predict(cv::Mat *im, FaceDetectionResult *result,
+                         float conf_threshold, float nms_iou_threshold) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {static_cast<float>(mat.Height()),
+                            static_cast<float>(mat.Width())};
+  im_info["output_shape"] = {static_cast<float>(mat.Height()),
+                             static_cast<float>(mat.Width())};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
+                   nms_iou_threshold)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h
new file mode 100755
index 0000000000..d17f679678
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face.h
@@ -0,0 +1,102 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+/*! @brief YOLOv5Face model object used when to load a YOLOv5Face model exported
+ * by YOLOv5Face.
+ */
+class ULTRAINFER_DECL YOLOv5Face : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov5face.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv5Face(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "yolov5-face"; }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \param[in] conf_threshold
+   * confidence threashold for postprocessing, default is 0.25 \param[in]
+   * nms_iou_threshold iou threashold for NMS, default is 0.5 \return true if
+   * the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, FaceDetectionResult *result,
+                       float conf_threshold = 0.25,
+                       float nms_iou_threshold = 0.5);
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default size = {640, 640}
+  */
+  std::vector<int> size;
+  // padding value, size should be the same as channels
+
+  std::vector<float> padding_value;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad;
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+
+  bool is_no_pad;
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+
+  bool is_scale_up;
+  // padding stride, for is_mini_pad
+  int stride;
+  /*! @brief
+    Argument for image postprocessing step, setup the number of landmarks for
+    per face (if have), default 5 in official yolov5face note that, the outupt
+    tensor's shape must be:
+    (1,n,4+1+2*landmarks_per_face+1=box+obj+landmarks+cls), default 5
+  */
+  int landmarks_per_face;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, FaceDetectionResult *result,
+                   const std::map<std::string, std::array<float, 2>> &im_info,
+                   float conf_threshold, float nms_iou_threshold);
+
+  bool IsDynamicInput() const { return is_dynamic_input_; }
+
+  bool is_dynamic_input_;
+};
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc
new file mode 100755
index 0000000000..aead752b28
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov5face_pybind.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv5Face(pybind11::module &m) {
+  pybind11::class_<vision::facedet::YOLOv5Face, UltraInferModel>(m,
+                                                                 "YOLOv5Face")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facedet::YOLOv5Face &self, pybind11::array &data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::facedet::YOLOv5Face::size)
+      .def_readwrite("padding_value",
+                     &vision::facedet::YOLOv5Face::padding_value)
+      .def_readwrite("is_mini_pad", &vision::facedet::YOLOv5Face::is_mini_pad)
+      .def_readwrite("is_no_pad", &vision::facedet::YOLOv5Face::is_no_pad)
+      .def_readwrite("is_scale_up", &vision::facedet::YOLOv5Face::is_scale_up)
+      .def_readwrite("stride", &vision::facedet::YOLOv5Face::stride)
+      .def_readwrite("landmarks_per_face",
+                     &vision::facedet::YOLOv5Face::landmarks_per_face);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc
new file mode 100755
index 0000000000..3f69724c11
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.cc
@@ -0,0 +1,135 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+Yolov7FacePostprocessor::Yolov7FacePostprocessor() {
+  conf_threshold_ = 0.5;
+  nms_threshold_ = 0.45;
+  landmarks_per_face_ = 5;
+}
+
+bool Yolov7FacePostprocessor::Run(
+    const std::vector<FDTensor> &infer_result,
+    std::vector<FaceDetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  int batch = infer_result[0].shape[0];
+
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+    (*results)[bs].Clear();
+    // must be setup landmarks_per_face before reserve
+    (*results)[bs].landmarks_per_face = landmarks_per_face_;
+    (*results)[bs].Reserve(infer_result[0].shape[1]);
+    if (infer_result[0].dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    const float *data =
+        reinterpret_cast<const float *>(infer_result[0].Data()) +
+        bs * infer_result[0].shape[1] * infer_result[0].shape[2];
+    for (size_t i = 0; i < infer_result[0].shape[1]; ++i) {
+      int s = i * infer_result[0].shape[2];
+      float confidence = data[s + 4];
+      const float *reg_cls_ptr = data + s;
+      const float *class_score = data + s + 5;
+      confidence *= (*class_score);
+      // filter boxes by conf_threshold
+      if (confidence <= conf_threshold_) {
+        continue;
+      }
+      float x = reg_cls_ptr[0];
+      float y = reg_cls_ptr[1];
+      float w = reg_cls_ptr[2];
+      float h = reg_cls_ptr[3];
+
+      // convert from [x, y, w, h] to [x1, y1, x2, y2]
+      (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+          (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)});
+      (*results)[bs].scores.push_back(confidence);
+
+      // decode landmarks (default 5 landmarks)
+      if (landmarks_per_face_ > 0) {
+        float *landmarks_ptr = const_cast<float *>(reg_cls_ptr + 6);
+        for (size_t j = 0; j < landmarks_per_face_ * 3; j += 3) {
+          (*results)[bs].landmarks.emplace_back(
+              std::array<float, 2>{landmarks_ptr[j], landmarks_ptr[j + 1]});
+        }
+      }
+    }
+
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    utils::NMS(&((*results)[bs]), nms_threshold_);
+
+    // scale the boxes to the origin image shape
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+             "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      // clip box
+      (*results)[bs].boxes[i][0] =
+          std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][1] =
+          std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][2] =
+          std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][3] =
+          std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][0] =
+          std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][1] =
+          std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f);
+      (*results)[bs].boxes[i][2] =
+          std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][3] =
+          std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f);
+    }
+
+    // scale and clip landmarks
+    for (size_t i = 0; i < (*results)[bs].landmarks.size(); ++i) {
+      (*results)[bs].landmarks[i][0] =
+          std::max(((*results)[bs].landmarks[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].landmarks[i][1] =
+          std::max(((*results)[bs].landmarks[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].landmarks[i][0] =
+          std::min((*results)[bs].landmarks[i][0], ipt_w - 1.0f);
+      (*results)[bs].landmarks[i][1] =
+          std::min((*results)[bs].landmarks[i][1], ipt_h - 1.0f);
+    }
+  }
+  return true;
+}
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h
new file mode 100755
index 0000000000..868002fd5b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+class ULTRAINFER_DECL Yolov7FacePostprocessor {
+public:
+  /*! @brief Postprocessor object for YOLOv7Face serials model.
+   */
+  Yolov7FacePostprocessor();
+
+  /** \brief Process the result of runtime and fill to FaceDetectionResult
+   * structure
+   *
+   * \param[in] infer_result The inference result from runtime
+   * \param[in] results The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &infer_result,
+      std::vector<FaceDetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.5
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.5
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.45
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.45
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+  /// Set landmarks_per_face, default 5
+  void SetLandmarksPerFace(const int &landmarks_per_face) {
+    landmarks_per_face_ = landmarks_per_face;
+  }
+
+  /// Get landmarks_per_face, default 5
+  int GetLandmarksPerFace() const { return landmarks_per_face_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  int landmarks_per_face_;
+};
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc
new file mode 100755
index 0000000000..1462344135
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.cc
@@ -0,0 +1,123 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+Yolov7FacePreprocessor::Yolov7FacePreprocessor() {
+  size_ = {640, 640};
+  padding_color_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = false;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+}
+
+bool Yolov7FacePreprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); i++) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+bool Yolov7FacePreprocessor::Preprocess(
+    FDMat *mat, FDTensor *output,
+    std::map<std::string, std::array<float, 2>> *im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+
+  // yolov7-face's preprocess steps
+  // 1. letterbox
+  // 2. convert_and_permute(swap_rb=true)
+  LetterBox(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+void Yolov7FacePreprocessor::LetterBox(FDMat *mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+  Resize::Run(mat, resize_w, resize_h);
+
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_color_value_);
+  }
+}
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h
new file mode 100755
index 0000000000..245e097d88
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+class ULTRAINFER_DECL Yolov7FacePreprocessor {
+public:
+  /** \brief Create a preprocessor instance for YOLOv7Face serials model
+   */
+  Yolov7FacePreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \ret
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int> &size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingColorValue(const std::vector<float> &padding_color_value) {
+    padding_color_value_ = padding_color_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  std::vector<float> GetPaddingColorValue() const {
+    return padding_color_value_;
+  }
+
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) { is_scale_up_ = is_scale_up; }
+
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output,
+                  std::map<std::string, std::array<float, 2>> *im_info);
+
+  void LetterBox(FDMat *mat);
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_color_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+};
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc
new file mode 100755
index 0000000000..5f5508614c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.cc
@@ -0,0 +1,89 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+YOLOv7Face::YOLOv7Face(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv7Face::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv7Face::Predict(const cv::Mat &im, FaceDetectionResult *result) {
+  std::vector<FaceDetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool YOLOv7Face::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<FaceDetectionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  FDASSERT(images.size() == 1, "Only support batch = 1 now.");
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h
new file mode 100755
index 0000000000..ae268feb6c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face.h
@@ -0,0 +1,81 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/facedet/contrib/yolov7face/postprocessor.h"
+#include "ultrainfer/vision/facedet/contrib/yolov7face/preprocessor.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+/*! @brief YOLOv7Face model object used when to load a YOLOv7Face model exported
+ * by YOLOv7Face.
+ */
+class ULTRAINFER_DECL YOLOv7Face : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov7face.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  YOLOv7Face(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() { return "yolov7-face"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, FaceDetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<FaceDetectionResult> *results);
+
+  /// Get preprocessor reference of YOLOv7Face
+  virtual Yolov7FacePreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of YOLOv7Face
+  virtual Yolov7FacePostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  Yolov7FacePreprocessor preprocessor_;
+  Yolov7FacePostprocessor postprocessor_;
+};
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc
new file mode 100755
index 0000000000..bcbbb1f623
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc
@@ -0,0 +1,117 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindYOLOv7Face(pybind11::module &m) {
+  pybind11::class_<vision::facedet::Yolov7FacePreprocessor>(
+      m, "Yolov7FacePreprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::facedet::Yolov7FacePreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+             if (!self.Run(&images, &outputs, &ims_info)) {
+               throw std::runtime_error("Failed to preprocess the input data "
+                                        "in PaddleClasPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return make_pair(outputs, ims_info);
+           })
+      .def_property("size", &vision::facedet::Yolov7FacePreprocessor::GetSize,
+                    &vision::facedet::Yolov7FacePreprocessor::SetSize)
+      .def_property(
+          "padding_color_value",
+          &vision::facedet::Yolov7FacePreprocessor::GetPaddingColorValue,
+          &vision::facedet::Yolov7FacePreprocessor::SetPaddingColorValue)
+      .def_property("is_scale_up",
+                    &vision::facedet::Yolov7FacePreprocessor::GetScaleUp,
+                    &vision::facedet::Yolov7FacePreprocessor::SetScaleUp);
+
+  pybind11::class_<vision::facedet::Yolov7FacePostprocessor>(
+      m, "YOLOv7FacePostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::facedet::Yolov7FacePostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::FaceDetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in Yolov7Postprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::facedet::Yolov7FacePostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::FaceDetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in YOLOv7Postprocessor.");
+             }
+             return results;
+           })
+      .def_property("conf_threshold",
+                    &vision::facedet::Yolov7FacePostprocessor::GetConfThreshold,
+                    &vision::facedet::Yolov7FacePostprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::facedet::Yolov7FacePostprocessor::GetNMSThreshold,
+                    &vision::facedet::Yolov7FacePostprocessor::SetNMSThreshold)
+      .def_property(
+          "landmarks_per_face",
+          &vision::facedet::Yolov7FacePostprocessor::GetLandmarksPerFace,
+          &vision::facedet::Yolov7FacePostprocessor::SetLandmarksPerFace);
+
+  pybind11::class_<vision::facedet::YOLOv7Face, UltraInferModel>(m,
+                                                                 "YOLOv7Face")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facedet::YOLOv7Face &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::facedet::YOLOv7Face &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::FaceDetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::facedet::YOLOv7Face::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::facedet::YOLOv7Face::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc
new file mode 100755
index 0000000000..3e3fe95b43
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/facedet_pybind.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindRetinaFace(pybind11::module &m);
+void BindUltraFace(pybind11::module &m);
+void BindYOLOv5Face(pybind11::module &m);
+void BindYOLOv7Face(pybind11::module &m);
+void BindCenterFace(pybind11::module &m);
+void BindBlazeFace(pybind11::module &m);
+void BindSCRFD(pybind11::module &m);
+
+void BindFaceDet(pybind11::module &m) {
+  auto facedet_module = m.def_submodule("facedet", "Face detection models.");
+  BindRetinaFace(facedet_module);
+  BindUltraFace(facedet_module);
+  BindYOLOv5Face(facedet_module);
+  BindYOLOv7Face(facedet_module);
+  BindCenterFace(facedet_module);
+  BindBlazeFace(facedet_module);
+  BindSCRFD(facedet_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc
new file mode 100755
index 0000000000..459a0d85de
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+BlazeFace::BlazeFace(const std::string &model_file,
+                     const std::string &params_file,
+                     const std::string &config_file,
+                     const RuntimeOption &custom_option,
+                     const ModelFormat &model_format)
+    : preprocessor_(config_file) {
+  valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::LITE};
+  valid_gpu_backends = {Backend::OPENVINO, Backend::LITE, Backend::PDINFER};
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool BlazeFace::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool BlazeFace::Predict(const cv::Mat &im, FaceDetectionResult *result) {
+  std::vector<FaceDetectionResult> results;
+  if (!this->BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool BlazeFace::BatchPredict(const std::vector<cv::Mat> &images,
+                             std::vector<FaceDetectionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  FDASSERT(images.size() == 1, "Only support batch = 1 now.");
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = "image";
+  reused_input_tensors_[1].name = "scale_factor";
+  reused_input_tensors_[2].name = "im_shape";
+
+  // Some models don't need scale_factor and im_shape as input
+  while (reused_input_tensors_.size() != NumInputsOfRuntime()) {
+    reused_input_tensors_.pop_back();
+  }
+
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h
new file mode 100755
index 0000000000..020d7ff47e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface.h
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h"
+#include "ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+/*! @brief BlazeFace model object used when to load a BlazeFace model exported
+ * by BlazeFace.
+ */
+class ULTRAINFER_DECL BlazeFace : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./blazeface.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g
+   * resnet/infer_cfg.yml \param[in] custom_option RuntimeOption for inference,
+   * the default will use cpu, and choose the backend defined in
+   * "valid_cpu_backends" \param[in] model_format Model format of the loaded
+   * model, default is ONNX format
+   */
+  BlazeFace(const std::string &model_file, const std::string &params_file = "",
+            const std::string &config_file = "",
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() { return "blaze-face"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  bool Predict(const cv::Mat &im, FaceDetectionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<FaceDetectionResult> *results);
+
+  /// Get preprocessor reference of BlazeFace
+  virtual BlazeFacePreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of BlazeFace
+  virtual BlazeFacePostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  BlazeFacePreprocessor preprocessor_;
+  BlazeFacePostprocessor postprocessor_;
+};
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc
new file mode 100755
index 0000000000..5ae913a14b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/blazeface_pybind.cc
@@ -0,0 +1,102 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindBlazeFace(pybind11::module &m) {
+  pybind11::class_<vision::facedet::BlazeFacePreprocessor>(
+      m, "BlazeFacePreprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::facedet::BlazeFacePreprocessor &self,
+                     std::vector<pybind11::array> &im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+        if (!self.Run(&images, &outputs, &ims_info)) {
+          throw std::runtime_error(
+              "Failed to preprocess the input data in BlazeFacePreprocessor.");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return make_pair(outputs, ims_info);
+      });
+
+  pybind11::class_<vision::facedet::BlazeFacePostprocessor>(
+      m, "BlazeFacePostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::facedet::BlazeFacePostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::FaceDetectionResult> results;
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in BlazeFacePostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::facedet::BlazeFacePostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::map<std::string, std::array<float, 2>>>
+                  &ims_info) {
+             std::vector<vision::FaceDetectionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, ims_info)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in BlazePostprocessor.");
+             }
+             return results;
+           })
+      .def_property("conf_threshold",
+                    &vision::facedet::BlazeFacePostprocessor::GetConfThreshold,
+                    &vision::facedet::BlazeFacePostprocessor::SetConfThreshold)
+      .def_property("nms_threshold",
+                    &vision::facedet::BlazeFacePostprocessor::GetNMSThreshold,
+                    &vision::facedet::BlazeFacePostprocessor::SetNMSThreshold);
+
+  pybind11::class_<vision::facedet::BlazeFace, UltraInferModel>(m, "BlazeFace")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::facedet::BlazeFace &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::facedet::BlazeFace &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::FaceDetectionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::facedet::BlazeFace::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::facedet::BlazeFace::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc
new file mode 100755
index 0000000000..1ce5e54e02
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h"
+#include "ultrainfer/vision/detection/ppdet/multiclass_nms.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+BlazeFacePostprocessor::BlazeFacePostprocessor() {
+  conf_threshold_ = 0.5;
+  nms_threshold_ = 0.3;
+}
+
+bool BlazeFacePostprocessor::Run(
+    const std::vector<FDTensor> &tensors,
+    std::vector<FaceDetectionResult> *results,
+    const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info) {
+  // Get number of boxes for each input image
+  std::vector<int> num_boxes(tensors[1].shape[0]);
+  int total_num_boxes = 0;
+  if (tensors[1].dtype == FDDataType::INT32) {
+    const auto *data = static_cast<const int32_t *>(tensors[1].CpuData());
+    for (size_t i = 0; i < tensors[1].shape[0]; ++i) {
+      num_boxes[i] = static_cast<int>(data[i]);
+      total_num_boxes += num_boxes[i];
+    }
+  } else if (tensors[1].dtype == FDDataType::INT64) {
+    const auto *data = static_cast<const int64_t *>(tensors[1].CpuData());
+    for (size_t i = 0; i < tensors[1].shape[0]; ++i) {
+      num_boxes[i] = static_cast<int>(data[i]);
+    }
+  }
+
+  // Special case for TensorRT, it has fixed output shape of NMS
+  // So there's invalid boxes in its' output boxes
+  int num_output_boxes = static_cast<int>(tensors[0].Shape()[0]);
+  bool contain_invalid_boxes = false;
+  if (total_num_boxes != num_output_boxes) {
+    if (num_output_boxes % num_boxes.size() == 0) {
+      contain_invalid_boxes = true;
+    } else {
+      FDERROR << "Cannot handle the output data for this model, unexpected "
+                 "situation."
+              << std::endl;
+      return false;
+    }
+  }
+
+  // Get boxes for each input image
+  results->resize(num_boxes.size());
+
+  if (tensors[0].shape[0] == 0) {
+    // No detected boxes
+    return true;
+  }
+
+  const auto *box_data = static_cast<const float *>(tensors[0].CpuData());
+  int offset = 0;
+  for (size_t i = 0; i < num_boxes.size(); ++i) {
+    const float *ptr = box_data + offset;
+    (*results)[i].Reserve(num_boxes[i]);
+    for (size_t j = 0; j < num_boxes[i]; ++j) {
+      if (ptr[j * 6 + 1] > conf_threshold_) {
+        (*results)[i].scores.push_back(ptr[j * 6 + 1]);
+        (*results)[i].boxes.emplace_back(std::array<float, 4>(
+            {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]}));
+      }
+    }
+    if (contain_invalid_boxes) {
+      offset += static_cast<int>(num_output_boxes * 6 / num_boxes.size());
+    } else {
+      offset += static_cast<int>(num_boxes[i] * 6);
+    }
+  }
+  return true;
+}
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h
new file mode 100755
index 0000000000..fd8c78c83e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/postprocessor.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+class ULTRAINFER_DECL BlazeFacePostprocessor {
+public:
+  /*! @brief Postprocessor object for BlazeFace serials model.
+   */
+  BlazeFacePostprocessor();
+
+  /** \brief Process the result of runtime and fill to FaceDetectionResult
+   * structure
+   *
+   * \param[in] infer_result The inference result from runtime
+   * \param[in] results The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool
+  Run(const std::vector<FDTensor> &infer_result,
+      std::vector<FaceDetectionResult> *results,
+      const std::vector<std::map<std::string, std::array<float, 2>>> &ims_info);
+
+  /// Set conf_threshold, default 0.5
+  void SetConfThreshold(const float &conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.5
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.3
+  void SetNMSThreshold(const float &nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.3
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+protected:
+  float conf_threshold_;
+  float nms_threshold_;
+};
+
+} // namespace facedet
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc
new file mode 100755
index 0000000000..f13ee64b52
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.cc
@@ -0,0 +1,209 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h"
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/function/pad.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+BlazeFacePreprocessor::BlazeFacePreprocessor(const std::string &config_file) {
+  is_scale_ = false;
+  normalize_mean_ = {123, 117, 104};
+  normalize_std_ = {127.502231, 127.502231, 127.502231};
+  this->config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create PaddleDetPreprocessor.");
+}
+
+bool BlazeFacePreprocessor::Run(
+    std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>> *ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(3);
+  int batch = static_cast<int>(images->size());
+  // Allocate memory for scale_factor
+  (*outputs)[1].Resize({batch, 2}, FDDataType::FP32);
+  // Allocate memory for im_shape
+  (*outputs)[2].Resize({batch, 2}, FDDataType::FP32);
+
+  std::vector<int> max_hw({-1, -1});
+
+  auto *scale_factor_ptr =
+      reinterpret_cast<float *>((*outputs)[1].MutableData());
+  auto *im_shape_ptr = reinterpret_cast<float *>((*outputs)[2].MutableData());
+
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> im_tensors(images->size());
+
+  for (size_t i = 0; i < images->size(); ++i) {
+    int origin_w = (*images)[i].Width();
+    int origin_h = (*images)[i].Height();
+    scale_factor_ptr[2 * i] = 1.0;
+    scale_factor_ptr[2 * i + 1] = 1.0;
+
+    for (size_t j = 0; j < processors_.size(); ++j) {
+      if (!(*(processors_[j].get()))(&((*images)[i]))) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << processors_[i]->Name() << "." << std::endl;
+        return false;
+      }
+      if (processors_[j]->Name().find("Resize") != std::string::npos) {
+        scale_factor_ptr[2 * i] = (*images)[i].Height() * 1.0 / origin_h;
+        scale_factor_ptr[2 * i + 1] = (*images)[i].Width() * 1.0 / origin_w;
+      }
+    }
+
+    if ((*images)[i].Height() > max_hw[0]) {
+      max_hw[0] = (*images)[i].Height();
+    }
+    if ((*images)[i].Width() > max_hw[1]) {
+      max_hw[1] = (*images)[i].Width();
+    }
+    im_shape_ptr[2 * i] = max_hw[0];
+    im_shape_ptr[2 * i + 1] = max_hw[1];
+
+    if ((*images)[i].Height() < max_hw[0] || (*images)[i].Width() < max_hw[1]) {
+      // if the size of image less than max_hw, pad to max_hw
+      FDTensor tensor;
+      (*images)[i].ShareWithTensor(&tensor);
+      function::Pad(tensor, &(im_tensors[i]),
+                    {0, 0, max_hw[0] - (*images)[i].Height(),
+                     max_hw[1] - (*images)[i].Width()},
+                    0);
+    } else {
+      // No need pad
+      (*images)[i].ShareWithTensor(&(im_tensors[i]));
+    }
+    // Reshape to 1xCxHxW
+    im_tensors[i].ExpandDim(0);
+  }
+
+  if (im_tensors.size() == 1) {
+    // If there's only 1 input, no need to concat
+    // skip memory copy
+    (*outputs)[0] = std::move(im_tensors[0]);
+  } else {
+    // Else concat the im tensor for each input image
+    // compose a batched input tensor
+    function::Concat(im_tensors, &((*outputs)[0]), 0);
+  }
+
+  return true;
+}
+
+bool BlazeFacePreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  processors_.push_back(std::make_shared<BGR2RGB>());
+
+  bool has_permute = false;
+  for (const auto &op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "NormalizeImage") {
+      auto mean = op["mean"].as<std::vector<float>>();
+      auto std = op["std"].as<std::vector<float>>();
+      bool is_scale = true;
+      if (op["is_scale"]) {
+        is_scale = op["is_scale"].as<bool>();
+      }
+      std::string norm_type = "mean_std";
+      if (op["norm_type"]) {
+        norm_type = op["norm_type"].as<std::string>();
+      }
+      if (norm_type != "mean_std") {
+        std::fill(mean.begin(), mean.end(), 0.0);
+        std::fill(std.begin(), std.end(), 1.0);
+      }
+      processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+    } else if (op_name == "Resize") {
+      bool keep_ratio = op["keep_ratio"].as<bool>();
+      auto target_size = op["target_size"].as<std::vector<int>>();
+      int interp = op["interp"].as<int>();
+      FDASSERT(target_size.size() == 2,
+               "Require size of target_size be 2, but now it's %lu.",
+               target_size.size());
+      if (!keep_ratio) {
+        int width = target_size[1];
+        int height = target_size[0];
+        processors_.push_back(
+            std::make_shared<Resize>(width, height, -1.0, -1.0, interp, false));
+      } else {
+        int min_target_size = std::min(target_size[0], target_size[1]);
+        int max_target_size = std::max(target_size[0], target_size[1]);
+        std::vector<int> max_size;
+        if (max_target_size > 0) {
+          max_size.push_back(max_target_size);
+          max_size.push_back(max_target_size);
+        }
+        processors_.push_back(std::make_shared<ResizeByShort>(
+            min_target_size, interp, true, max_size));
+      }
+    } else if (op_name == "Permute") {
+      // Do nothing, do permute as the last operation
+      has_permute = true;
+      continue;
+    } else if (op_name == "Pad") {
+      auto size = op["size"].as<std::vector<int>>();
+      auto value = op["fill_value"].as<std::vector<float>>();
+      processors_.push_back(std::make_shared<Cast>("float"));
+      processors_.push_back(
+          std::make_shared<PadToSize>(size[1], size[0], value));
+    } else if (op_name == "PadStride") {
+      auto stride = op["stride"].as<int>();
+      processors_.push_back(
+          std::make_shared<StridePad>(stride, std::vector<float>(3, 0)));
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+
+  if (has_permute) {
+    // permute = cast<float> + HWC2CHW
+    processors_.push_back(std::make_shared<Cast>("float"));
+    processors_.push_back(std::make_shared<HWC2CHW>());
+  }
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+
+  return true;
+}
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h
new file mode 100755
index 0000000000..afef5d31e7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/facedet/ppdet/blazeface/preprocessor.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/detection/ppdet/preprocessor.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace facedet {
+
+class ULTRAINFER_DECL BlazeFacePreprocessor
+    : public ultrainfer::vision::detection::PaddleDetPreprocessor {
+public:
+  /** \brief Create a preprocessor instance for BlazeFace serials model
+   */
+  BlazeFacePreprocessor() = default;
+
+  /** \brief Create a preprocessor instance for Blazeface serials model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * ppyoloe/infer_cfg.yml
+   */
+  explicit BlazeFacePreprocessor(const std::string &config_file);
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \ret
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>> *ims_info);
+
+private:
+  bool BuildPreprocessPipelineFromConfig();
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_;
+
+  std::vector<float> normalize_mean_;
+
+  std::vector<float> normalize_std_;
+
+  std::vector<std::shared_ptr<Processor>> processors_;
+  // read config file
+  std::string config_file_;
+};
+
+} // namespace facedet
+
+} // namespace vision
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc
new file mode 100755
index 0000000000..7ebfa6f82c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.cc
@@ -0,0 +1,83 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/faceid/contrib/adaface/adaface.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+
+AdaFace::AdaFace(const std::string &model_file, const std::string &params_file,
+                 const ultrainfer::RuntimeOption &custom_option,
+                 const ultrainfer::ModelFormat &model_format) {
+
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool AdaFace::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool AdaFace::Predict(const cv::Mat &im, FaceRecognitionResult *result) {
+  std::vector<FaceRecognitionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  if (!results.empty()) {
+    *result = std::move(results[0]);
+  }
+  return true;
+}
+
+bool AdaFace::BatchPredict(const std::vector<cv::Mat> &images,
+                           std::vector<FaceRecognitionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  FDASSERT(images.size() == 1, "Only support batch = 1 now.");
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h
new file mode 100755
index 0000000000..b88725a544
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface.h
@@ -0,0 +1,76 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/faceid/contrib/adaface/postprocessor.h"
+#include "ultrainfer/vision/faceid/contrib/adaface/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+/*! @brief AdaFace model object used when to load a AdaFace model exported by
+ * AdaFace.
+ */
+class ULTRAINFER_DECL AdaFace : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./adaface.onnx
+   * \param[in] params_file Path of parameter file, e.g adaface/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  AdaFace(const std::string &model_file, const std::string &params_file = "",
+          const RuntimeOption &custom_option = RuntimeOption(),
+          const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "insightface_rec"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output
+   * FaceRecognitionResult will be writen to this structure \return true if the
+   * prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, FaceRecognitionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output FaceRecognitionResult list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<FaceRecognitionResult> *results);
+
+  /// Get preprocessor reference of AdaFace
+  virtual AdaFacePreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of AdaFace
+  virtual AdaFacePostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  AdaFacePreprocessor preprocessor_;
+  AdaFacePostprocessor postprocessor_;
+};
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc
new file mode 100755
index 0000000000..838fabaa0c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/adaface_pybind.cc
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindAdaFace(pybind11::module &m) {
+  pybind11::class_<vision::faceid::AdaFacePreprocessor>(m,
+                                                        "AdaFacePreprocessor")
+      .def(pybind11::init())
+      .def("run",
+           [](vision::faceid::AdaFacePreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error("Failed to preprocess the input data "
+                                        "in AdaFacePreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def_property("permute", &vision::faceid::AdaFacePreprocessor::GetPermute,
+                    &vision::faceid::AdaFacePreprocessor::SetPermute)
+      .def_property("alpha", &vision::faceid::AdaFacePreprocessor::GetAlpha,
+                    &vision::faceid::AdaFacePreprocessor::SetAlpha)
+      .def_property("beta", &vision::faceid::AdaFacePreprocessor::GetBeta,
+                    &vision::faceid::AdaFacePreprocessor::SetBeta)
+      .def_property("size", &vision::faceid::AdaFacePreprocessor::GetSize,
+                    &vision::faceid::AdaFacePreprocessor::SetSize);
+
+  pybind11::class_<vision::faceid::AdaFacePostprocessor>(m,
+                                                         "AdaFacePostprocessor")
+      .def(pybind11::init())
+      .def("run",
+           [](vision::faceid::AdaFacePostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::FaceRecognitionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in AdaFacePostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::faceid::AdaFacePostprocessor &self,
+              std::vector<pybind11::array> &input_array) {
+             std::vector<vision::FaceRecognitionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error("Failed to postprocess the runtime "
+                                        "result in AdaFacePostprocessor.");
+             }
+             return results;
+           })
+      .def_property("l2_normalize",
+                    &vision::faceid::AdaFacePostprocessor::GetL2Normalize,
+                    &vision::faceid::AdaFacePostprocessor::SetL2Normalize);
+
+  pybind11::class_<vision::faceid::AdaFace, UltraInferModel>(m, "AdaFace")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::faceid::AdaFace &self, pybind11::array &data) {
+             cv::Mat im = PyArrayToCvMat(data);
+             vision::FaceRecognitionResult result;
+             self.Predict(im, &result);
+             return result;
+           })
+      .def("batch_predict",
+           [](vision::faceid::AdaFace &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::FaceRecognitionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::faceid::AdaFace::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::faceid::AdaFace::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc
new file mode 100755
index 0000000000..5a559f4bb9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.cc
@@ -0,0 +1,64 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/faceid/contrib/adaface/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+
+AdaFacePostprocessor::AdaFacePostprocessor() { l2_normalize_ = false; }
+
+bool AdaFacePostprocessor::Run(std::vector<FDTensor> &infer_result,
+                               std::vector<FaceRecognitionResult> *results) {
+  if (infer_result[0].dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  if (infer_result.size() != 1) {
+    FDERROR << "The default number of output tensor "
+               "must be 1 according to insightface."
+            << std::endl;
+  }
+  int batch = infer_result[0].shape[0];
+  results->resize(batch);
+  for (size_t bs = 0; bs < batch; ++bs) {
+    FDTensor &embedding_tensor = infer_result.at(bs);
+    FDASSERT((embedding_tensor.shape[0] == 1), "Only support batch = 1 now.");
+    if (embedding_tensor.dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    (*results)[bs].Clear();
+    (*results)[bs].Resize(embedding_tensor.Numel());
+
+    // Copy the raw embedding vector directly without L2 normalize
+    // post process. Let the user decide whether to normalize or not.
+    // Will call utils::L2Normlize() method to perform L2
+    // normalize if l2_normalize was set as 'true'.
+    std::memcpy((*results)[bs].embedding.data(), embedding_tensor.Data(),
+                embedding_tensor.Nbytes());
+    if (l2_normalize_) {
+      auto norm_embedding = utils::L2Normalize((*results)[bs].embedding);
+      std::memcpy((*results)[bs].embedding.data(), norm_embedding.data(),
+                  embedding_tensor.Nbytes());
+    }
+  }
+  return true;
+}
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h
new file mode 100755
index 0000000000..c5c38b661f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/postprocessor.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace faceid {
+/*! @brief Postprocessor object for AdaFace serials model.
+ */
+class ULTRAINFER_DECL AdaFacePostprocessor {
+public:
+  /** \brief Create a postprocessor instance for AdaFace serials model
+   */
+  AdaFacePostprocessor();
+
+  /** \brief Process the result of runtime and fill to FaceRecognitionResult
+   * structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of FaceRecognitionResult
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDTensor> &infer_result,
+           std::vector<FaceRecognitionResult> *results);
+
+  void SetL2Normalize(bool &l2_normalize) { l2_normalize_ = l2_normalize; }
+
+  bool GetL2Normalize() { return l2_normalize_; }
+
+private:
+  bool l2_normalize_;
+};
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc
new file mode 100755
index 0000000000..092bdb4873
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.cc
@@ -0,0 +1,76 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/faceid/contrib/adaface/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+
+AdaFacePreprocessor::AdaFacePreprocessor() {
+  // parameters for preprocess
+  size_ = {112, 112};
+  alpha_ = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+  beta_ = {-1.f, -1.f, -1.f}; // RGB
+  permute_ = true;
+}
+
+bool AdaFacePreprocessor::Preprocess(FDMat *mat, FDTensor *output) {
+  // face recognition model's preprocess steps in insightface
+  // reference: insightface/recognition/arcface_torch/inference.py
+  // 1. Resize
+  // 2. BGR2RGB
+  // 3. Convert(opencv style) or Normalize
+  // 4. HWC2CHW
+  int resize_w = size_[0];
+  int resize_h = size_[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (permute_) {
+    BGR2RGB::Run(mat);
+  }
+
+  Convert::Run(mat, alpha_, beta_);
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool AdaFacePreprocessor::Run(std::vector<FDMat> *images,
+                              std::vector<FDTensor> *outputs) {
+  if (images->empty()) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  FDASSERT(images->size() == 1, "Only support batch = 1 now.");
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+  (*outputs)[0] = std::move(tensors[0]);
+  return true;
+}
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h
new file mode 100755
index 0000000000..d4e46bdace
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/adaface/preprocessor.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace faceid {
+/*! @brief Preprocessor object for AdaFace serials model.
+ */
+class ULTRAINFER_DECL AdaFacePreprocessor {
+public:
+  /** \brief Create a preprocessor instance for AdaFace serials model
+   */
+  AdaFacePreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs);
+
+  /// Get Size
+  std::vector<int> GetSize() { return size_; }
+
+  /// Set size.
+  void SetSize(std::vector<int> &size) { size_ = size; }
+
+  /// Get alpha
+  std::vector<float> GetAlpha() { return alpha_; }
+
+  /// Set alpha.
+  void SetAlpha(std::vector<float> &alpha) { alpha_ = alpha; }
+
+  /// Get beta
+  std::vector<float> GetBeta() { return beta_; }
+
+  /// Set beta.
+  void SetBeta(std::vector<float> &beta) { beta_ = beta; }
+
+  bool GetPermute() { return permute_; }
+
+  /// Set permute.
+  void SetPermute(bool permute) { permute_ = permute; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output);
+  // Argument for image preprocessing step, tuple of (width, height),
+  // decide the target size after resize, default (112, 112)
+  std::vector<int> size_;
+  // Argument for image preprocessing step, alpha values for normalization,
+  // default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+  std::vector<float> alpha_;
+  // Argument for image preprocessing step, beta values for normalization,
+  // default beta = {-1.f, -1.f, -1.f}
+  std::vector<float> beta_;
+  // Argument for image preprocessing step, whether to swap the B and R channel,
+  // such as BGR->RGB, default true.
+  bool permute_;
+};
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc
new file mode 100755
index 0000000000..2ab0be0dba
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/faceid/contrib/insightface/base.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+
+InsightFaceRecognitionBase::InsightFaceRecognitionBase(
+    const std::string &model_file, const std::string &params_file,
+    const ultrainfer::RuntimeOption &custom_option,
+    const ultrainfer::ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+  }
+  valid_rknpu_backends = {Backend::RKNPU2};
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+}
+
+bool InsightFaceRecognitionBase::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool InsightFaceRecognitionBase::Predict(const cv::Mat &im,
+                                         FaceRecognitionResult *result) {
+  std::vector<FaceRecognitionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool InsightFaceRecognitionBase::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<FaceRecognitionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  FDASSERT(images.size() == 1, "Only support batch = 1 now.");
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h
new file mode 100755
index 0000000000..10d24c69eb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/base.h
@@ -0,0 +1,81 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/faceid/contrib/insightface/postprocessor.h"
+#include "ultrainfer/vision/faceid/contrib/insightface/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+/*! @brief InsightFaceRecognition model object used when to load a
+ * InsightFaceRecognition model exported by InsightFaceRecognition.
+ */
+class ULTRAINFER_DECL InsightFaceRecognitionBase : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./arcface.onnx
+   * \param[in] params_file Path of parameter file, e.g arcface/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  InsightFaceRecognitionBase(
+      const std::string &model_file, const std::string &params_file = "",
+      const RuntimeOption &custom_option = RuntimeOption(),
+      const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "insightface_rec"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output
+   * FaceRecognitionResult will be writen to this structure \return true if the
+   * prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, FaceRecognitionResult *result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output FaceRecognitionResult list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<FaceRecognitionResult> *results);
+
+  /// Get preprocessor reference of InsightFaceRecognition
+  virtual InsightFaceRecognitionPreprocessor &GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  /// Get postprocessor reference of InsightFaceRecognition
+  virtual InsightFaceRecognitionPostprocessor &GetPostprocessor() {
+    return postprocessor_;
+  }
+
+protected:
+  bool Initialize();
+  InsightFaceRecognitionPreprocessor preprocessor_;
+  InsightFaceRecognitionPostprocessor postprocessor_;
+};
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc
new file mode 100755
index 0000000000..d05faf9585
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/insightface_pybind.cc
@@ -0,0 +1,138 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindInsightFace(pybind11::module &m) {
+  pybind11::class_<vision::faceid::InsightFaceRecognitionPreprocessor>(
+      m, "InsightFaceRecognitionPreprocessor")
+      .def(pybind11::init())
+      .def("run",
+           [](vision::faceid::InsightFaceRecognitionPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "InsightFaceRecognitionPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def(
+          "disable_normalize",
+          &vision::faceid::InsightFaceRecognitionPreprocessor::DisableNormalize)
+      .def("disable_permute",
+           &vision::faceid::InsightFaceRecognitionPreprocessor::DisablePermute)
+      .def_property(
+          "alpha",
+          &vision::faceid::InsightFaceRecognitionPreprocessor::GetAlpha,
+          &vision::faceid::InsightFaceRecognitionPreprocessor::SetAlpha)
+      .def_property(
+          "beta", &vision::faceid::InsightFaceRecognitionPreprocessor::GetBeta,
+          &vision::faceid::InsightFaceRecognitionPreprocessor::SetBeta)
+      .def_property(
+          "size", &vision::faceid::InsightFaceRecognitionPreprocessor::GetSize,
+          &vision::faceid::InsightFaceRecognitionPreprocessor::SetSize);
+
+  pybind11::class_<vision::faceid::InsightFaceRecognitionPostprocessor>(
+      m, "InsightFaceRecognitionPostprocessor")
+      .def(pybind11::init())
+      .def("run",
+           [](vision::faceid::InsightFaceRecognitionPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::FaceRecognitionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "InsightFaceRecognitionPostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::faceid::InsightFaceRecognitionPostprocessor &self,
+              std::vector<pybind11::array> &input_array) {
+             std::vector<vision::FaceRecognitionResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "InsightFaceRecognitionPostprocessor.");
+             }
+             return results;
+           })
+      .def_property(
+          "l2_normalize",
+          &vision::faceid::InsightFaceRecognitionPostprocessor::GetL2Normalize,
+          &vision::faceid::InsightFaceRecognitionPostprocessor::SetL2Normalize);
+
+  pybind11::class_<vision::faceid::InsightFaceRecognitionBase, UltraInferModel>(
+      m, "InsightFaceRecognitionBase")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::faceid::InsightFaceRecognitionBase &self,
+              pybind11::array &data) {
+             cv::Mat im = PyArrayToCvMat(data);
+             vision::FaceRecognitionResult result;
+             self.Predict(im, &result);
+             return result;
+           })
+      .def("batch_predict",
+           [](vision::faceid::InsightFaceRecognitionBase &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::FaceRecognitionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly(
+          "preprocessor",
+          &vision::faceid::InsightFaceRecognitionBase::GetPreprocessor)
+      .def_property_readonly(
+          "postprocessor",
+          &vision::faceid::InsightFaceRecognitionBase::GetPostprocessor);
+
+  pybind11::class_<vision::faceid::ArcFace,
+                   vision::faceid::InsightFaceRecognitionBase>(m, "ArcFace")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::faceid::CosFace,
+                   vision::faceid::InsightFaceRecognitionBase>(m, "CosFace")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::faceid::PartialFC,
+                   vision::faceid::InsightFaceRecognitionBase>(m, "PartialFC")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+
+  pybind11::class_<vision::faceid::VPL,
+                   vision::faceid::InsightFaceRecognitionBase>(m, "VPL")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>());
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h
new file mode 100755
index 0000000000..1dec9259d0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/model.h
@@ -0,0 +1,154 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/faceid/contrib/insightface/base.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+class ULTRAINFER_DECL ArcFace : public InsightFaceRecognitionBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g ArcFace/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g ArcFace/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in `valid_cpu_backends` \param[in] model_format
+   * Model format of the loaded model, default is Paddle format
+   */
+  ArcFace(const std::string &model_file, const std::string &params_file = "",
+          const RuntimeOption &custom_option = RuntimeOption(),
+          const ModelFormat &model_format = ModelFormat::ONNX)
+      : InsightFaceRecognitionBase(model_file, params_file, custom_option,
+                                   model_format) {
+    if (model_format == ModelFormat::ONNX) {
+      valid_cpu_backends = {Backend::ORT};
+      valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    } else if (model_format == ModelFormat::RKNN) {
+      valid_rknpu_backends = {Backend::RKNPU2};
+    } else {
+      valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+      valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+      valid_kunlunxin_backends = {Backend::LITE};
+    }
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "ArcFace"; }
+};
+
+class ULTRAINFER_DECL CosFace : public InsightFaceRecognitionBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g CosFace/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g CosFace/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in `valid_cpu_backends` \param[in] model_format
+   * Model format of the loaded model, default is Paddle format
+   */
+  CosFace(const std::string &model_file, const std::string &params_file = "",
+          const RuntimeOption &custom_option = RuntimeOption(),
+          const ModelFormat &model_format = ModelFormat::ONNX)
+      : InsightFaceRecognitionBase(model_file, params_file, custom_option,
+                                   model_format) {
+    if (model_format == ModelFormat::ONNX) {
+      valid_cpu_backends = {Backend::ORT};
+      valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    } else if (model_format == ModelFormat::RKNN) {
+      valid_rknpu_backends = {Backend::RKNPU2};
+    } else {
+      valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+      valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+      valid_kunlunxin_backends = {Backend::LITE};
+    }
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "CosFace"; }
+};
+class ULTRAINFER_DECL PartialFC : public InsightFaceRecognitionBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g PartialFC/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g
+   * PartialFC/model.pdiparams, if the model format is ONNX, this parameter will
+   * be ignored \param[in] custom_option RuntimeOption for inference, the
+   * default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  PartialFC(const std::string &model_file, const std::string &params_file = "",
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::ONNX)
+      : InsightFaceRecognitionBase(model_file, params_file, custom_option,
+                                   model_format) {
+    if (model_format == ModelFormat::ONNX) {
+      valid_cpu_backends = {Backend::ORT};
+      valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    } else if (model_format == ModelFormat::RKNN) {
+      valid_rknpu_backends = {Backend::RKNPU2};
+    } else {
+      valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+      valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+      valid_kunlunxin_backends = {Backend::LITE};
+    }
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "PartialFC"; }
+};
+class ULTRAINFER_DECL VPL : public InsightFaceRecognitionBase {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g VPL/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g VPL/model.pdiparams, if
+   * the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in `valid_cpu_backends` \param[in] model_format
+   * Model format of the loaded model, default is Paddle format
+   */
+  VPL(const std::string &model_file, const std::string &params_file = "",
+      const RuntimeOption &custom_option = RuntimeOption(),
+      const ModelFormat &model_format = ModelFormat::ONNX)
+      : InsightFaceRecognitionBase(model_file, params_file, custom_option,
+                                   model_format) {
+    if (model_format == ModelFormat::ONNX) {
+      valid_cpu_backends = {Backend::ORT};
+      valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    } else if (model_format == ModelFormat::RKNN) {
+      valid_rknpu_backends = {Backend::RKNPU2};
+    } else {
+      valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+      valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+      valid_kunlunxin_backends = {Backend::LITE};
+    }
+    initialized = Initialize();
+  }
+
+  virtual std::string ModelName() const { return "VPL"; }
+};
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc
new file mode 100755
index 0000000000..8fae91da57
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/faceid/contrib/insightface/postprocessor.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+
+InsightFaceRecognitionPostprocessor::InsightFaceRecognitionPostprocessor() {
+  l2_normalize_ = false;
+}
+
+bool InsightFaceRecognitionPostprocessor::Run(
+    std::vector<FDTensor> &infer_result,
+    std::vector<FaceRecognitionResult> *results) {
+  if (infer_result[0].dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  if (infer_result.size() != 1) {
+    FDERROR << "The default number of output tensor "
+               "must be 1 according to insightface."
+            << std::endl;
+  }
+  int batch = infer_result[0].shape[0];
+  results->resize(batch);
+  for (size_t bs = 0; bs < batch; ++bs) {
+    FDTensor &embedding_tensor = infer_result.at(bs);
+    FDASSERT((embedding_tensor.shape[0] == 1), "Only support batch = 1 now.");
+    if (embedding_tensor.dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    (*results)[bs].Clear();
+    (*results)[bs].Resize(embedding_tensor.Numel());
+
+    // Copy the raw embedding vector directly without L2 normalize
+    // post process. Let the user decide whether to normalize or not.
+    // Will call utils::L2Normlize() method to perform L2
+    // normalize if l2_normalize was set as 'true'.
+    std::memcpy((*results)[bs].embedding.data(), embedding_tensor.Data(),
+                embedding_tensor.Nbytes());
+    if (l2_normalize_) {
+      auto norm_embedding = utils::L2Normalize((*results)[bs].embedding);
+      std::memcpy((*results)[bs].embedding.data(), norm_embedding.data(),
+                  embedding_tensor.Nbytes());
+    }
+  }
+  return true;
+}
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h
new file mode 100755
index 0000000000..00b310971a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/postprocessor.h
@@ -0,0 +1,52 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace faceid {
+/*! @brief Postprocessor object for InsightFaceRecognition serials model.
+ */
+class ULTRAINFER_DECL InsightFaceRecognitionPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for InsightFaceRecognition serials
+   * model
+   */
+  InsightFaceRecognitionPostprocessor();
+
+  /** \brief Process the result of runtime and fill to FaceRecognitionResult
+   * structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of FaceRecognitionResult
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDTensor> &infer_result,
+           std::vector<FaceRecognitionResult> *results);
+
+  void SetL2Normalize(bool &l2_normalize) { l2_normalize_ = l2_normalize; }
+
+  bool GetL2Normalize() { return l2_normalize_; }
+
+private:
+  bool l2_normalize_;
+};
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc
new file mode 100755
index 0000000000..d5364c9a15
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.cc
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/faceid/contrib/insightface/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace faceid {
+
+InsightFaceRecognitionPreprocessor::InsightFaceRecognitionPreprocessor() {
+  // parameters for preprocess
+  size_ = {112, 112};
+  alpha_ = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+  beta_ = {-1.f, -1.f, -1.f}; // RGB
+}
+
+bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat *mat,
+                                                    FDTensor *output) {
+  // face recognition model's preprocess steps in insightface
+  // reference: insightface/recognition/arcface_torch/inference.py
+  // 1. Resize
+  // 2. BGR2RGB
+  // 3. Convert(opencv style) or Normalize
+  // 4. HWC2CHW
+  int resize_w = size_[0];
+  int resize_h = size_[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  if (!disable_permute_) {
+    BGR2RGB::Run(mat);
+  }
+
+  if (!disable_normalize_) {
+    Convert::Run(mat, alpha_, beta_);
+    HWC2CHW::Run(mat);
+    Cast::Run(mat, "float");
+  }
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool InsightFaceRecognitionPreprocessor::Run(std::vector<FDMat> *images,
+                                             std::vector<FDTensor> *outputs) {
+  if (images->empty()) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  FDASSERT(images->size() == 1, "Only support batch = 1 now.");
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size());
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+  (*outputs)[0] = std::move(tensors[0]);
+  return true;
+}
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h
new file mode 100755
index 0000000000..9986222432
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/contrib/insightface/preprocessor.h
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace faceid {
+/*! @brief Preprocessor object for InsightFaceRecognition serials model.
+ */
+class ULTRAINFER_DECL InsightFaceRecognitionPreprocessor {
+public:
+  /** \brief Create a preprocessor instance for InsightFaceRecognition serials
+   * model
+   */
+  InsightFaceRecognitionPreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs);
+
+  /// Get Size
+  std::vector<int> GetSize() { return size_; }
+
+  /// Set size.
+  void SetSize(std::vector<int> &size) { size_ = size; }
+
+  /// Get alpha
+  std::vector<float> GetAlpha() { return alpha_; }
+
+  /// Set alpha.
+  void SetAlpha(std::vector<float> &alpha) { alpha_ = alpha; }
+
+  /// Get beta
+  std::vector<float> GetBeta() { return beta_; }
+
+  /// Set beta.
+  void SetBeta(std::vector<float> &beta) { beta_ = beta; }
+
+  /// This function will disable normalize and hwc2chw in preprocessing step.
+  void DisableNormalize() { disable_normalize_ = true; }
+
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_permute_ = true; }
+
+protected:
+  bool Preprocess(FDMat *mat, FDTensor *output);
+  // Argument for image preprocessing step, tuple of (width, height),
+  // decide the target size after resize, default (112, 112)
+  std::vector<int> size_;
+  // Argument for image preprocessing step, alpha values for normalization,
+  // default alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+  std::vector<float> alpha_;
+  // Argument for image preprocessing step, beta values for normalization,
+  // default beta = {-1.f, -1.f, -1.f}
+  std::vector<float> beta_;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  // Argument for image preprocessing step, whether to swap the B and R channel,
+  // such as BGR->RGB, default true.
+  bool disable_permute_ = false;
+};
+
+} // namespace faceid
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc b/libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc
new file mode 100755
index 0000000000..970040baf3
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/faceid/faceid_pybind.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindInsightFace(pybind11::module &m);
+void BindAdaFace(pybind11::module &m);
+void BindFaceId(pybind11::module &m) {
+  auto faceid_module = m.def_submodule("faceid", "Face recognition models.");
+  BindInsightFace(faceid_module);
+  BindAdaFace(faceid_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc
new file mode 100755
index 0000000000..0441b9a20f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.cc
@@ -0,0 +1,81 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/generation/contrib/animegan.h"
+#include "ultrainfer/function/functions.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace generation {
+
+AnimeGAN::AnimeGAN(const std::string &model_file,
+                   const std::string &params_file,
+                   const RuntimeOption &custom_option,
+                   const ModelFormat &model_format) {
+
+  valid_cpu_backends = {Backend::PDINFER};
+  valid_gpu_backends = {Backend::PDINFER};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+
+  initialized = Initialize();
+}
+
+bool AnimeGAN::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool AnimeGAN::Predict(cv::Mat &img, cv::Mat *result) {
+  std::vector<cv::Mat> results;
+  if (!BatchPredict({img}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool AnimeGAN::BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<cv::Mat> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  std::vector<FDTensor> processed_data(1);
+  if (!preprocessor_.Run(fd_images, &(processed_data))) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+  std::vector<FDTensor> infer_result(1);
+  processed_data[0].name = InputInfoOfRuntime(0).name;
+
+  if (!Infer(processed_data, &infer_result)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+  if (!postprocessor_.Run(infer_result, results)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace generation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h
new file mode 100755
index 0000000000..3135fe72b8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/generation/contrib/postprocessor.h"
+#include "ultrainfer/vision/generation/contrib/preprocessor.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace generation {
+/*! @brief AnimeGAN model object is used when load a AnimeGAN model.
+ */
+class ULTRAINFER_DECL AnimeGAN : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g ./model.pdiparams, if
+   * the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is PADDLE format
+   */
+  AnimeGAN(const std::string &model_file, const std::string &params_file = "",
+           const RuntimeOption &custom_option = RuntimeOption(),
+           const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "styletransfer/animegan"; }
+
+  /** \brief Predict the style transfer result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output style transfer
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  bool Predict(cv::Mat &img, cv::Mat *result);
+
+  /** \brief Predict the style transfer result for a batch of input images
+   *
+   * \param[in] images The list of input images, each element comes from
+   * cv::imread(), is a 3-D array with layout HWC, BGR format \param[in] results
+   * The list of output style transfer results will be writen to this structure
+   * \return true if the batch prediction successed, otherwise false
+   */
+  bool BatchPredict(const std::vector<cv::Mat> &images,
+                    std::vector<cv::Mat> *results);
+
+  // Get preprocessor reference of AnimeGAN
+  AnimeGANPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  // Get postprocessor reference of AnimeGAN
+  AnimeGANPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+private:
+  bool Initialize();
+
+  AnimeGANPreprocessor preprocessor_;
+  AnimeGANPostprocessor postprocessor_;
+};
+
+} // namespace generation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc
new file mode 100755
index 0000000000..40fad3b990
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/animegan_pybind.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindAnimeGAN(pybind11::module &m) {
+  pybind11::class_<vision::generation::AnimeGAN, UltraInferModel>(m, "AnimeGAN")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::generation::AnimeGAN &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             cv::Mat res;
+             self.Predict(mat, &res);
+             auto ret = pybind11::array_t<unsigned char>(
+                 {res.rows, res.cols, res.channels()}, res.data);
+             return ret;
+           })
+      .def("batch_predict",
+           [](vision::generation::AnimeGAN &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<cv::Mat> results;
+             self.BatchPredict(images, &results);
+             std::vector<pybind11::array_t<unsigned char>> ret;
+             for (size_t i = 0; i < results.size(); ++i) {
+               ret.push_back(pybind11::array_t<unsigned char>(
+                   {results[i].rows, results[i].cols, results[i].channels()},
+                   results[i].data));
+             }
+             return ret;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::generation::AnimeGAN::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::generation::AnimeGAN::GetPostprocessor);
+
+  pybind11::class_<vision::generation::AnimeGANPreprocessor>(
+      m, "AnimeGANPreprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::generation::AnimeGANPreprocessor &self,
+                     std::vector<pybind11::array> &im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        if (!self.Run(images, &outputs)) {
+          throw std::runtime_error(
+              "Failed to preprocess the input data in PaddleClasPreprocessor.");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return outputs;
+      });
+  pybind11::class_<vision::generation::AnimeGANPostprocessor>(
+      m, "AnimeGANPostprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::generation::AnimeGANPostprocessor &self,
+                     std::vector<FDTensor> &inputs) {
+        std::vector<cv::Mat> results;
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error("Failed to postprocess the runtime result "
+                                   "in YOLOv5Postprocessor.");
+        }
+        return results;
+      });
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc
new file mode 100755
index 0000000000..7d94f15f09
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/generation/contrib/postprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace generation {
+
+bool AnimeGANPostprocessor::Run(std::vector<FDTensor> &infer_results,
+                                std::vector<cv::Mat> *results) {
+  // 1. Reverse normalization
+  // 2. RGB2BGR
+  FDTensor &output_tensor = infer_results.at(0);
+  std::vector<int64_t> shape = output_tensor.Shape(); // n, h, w, c
+  int size = shape[1] * shape[2] * shape[3];
+  results->resize(shape[0]);
+  float *infer_result_data = reinterpret_cast<float *>(output_tensor.Data());
+  for (size_t i = 0; i < results->size(); ++i) {
+    Mat result_mat = Mat::Create(shape[1], shape[2], 3, FDDataType::FP32,
+                                 infer_result_data + i * size);
+    std::vector<float> mean{127.5f, 127.5f, 127.5f};
+    std::vector<float> std{127.5f, 127.5f, 127.5f};
+    Convert::Run(&result_mat, mean, std);
+    // tmp data type is float[0-1.0],convert to uint type
+    auto temp = result_mat.GetOpenCVMat();
+    cv::Mat res = cv::Mat::zeros(temp->size(), CV_8UC3);
+    temp->convertTo(res, CV_8UC3, 1);
+    Mat fd_image = WrapMat(res);
+    BGR2RGB::Run(&fd_image);
+    res = *(fd_image.GetOpenCVMat());
+    res.copyTo(results->at(i));
+  }
+  return true;
+}
+
+} // namespace generation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h
new file mode 100755
index 0000000000..e51718874a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/postprocessor.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/function/functions.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace generation {
+/*! @brief Postprocessor object for AnimeGAN serials model.
+ */
+class ULTRAINFER_DECL AnimeGANPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for AnimeGAN serials model
+   */
+  AnimeGANPostprocessor() {}
+
+  /** \brief Process the result of runtime
+   *
+   * \param[in] infer_results The inference results from runtime
+   * \param[in] results The output results of style transfer
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDTensor> &infer_results, std::vector<cv::Mat> *results);
+};
+
+} // namespace generation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc
new file mode 100755
index 0000000000..367f266a6c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/generation/contrib/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace generation {
+
+bool AnimeGANPreprocessor::Run(std::vector<Mat> &images,
+                               std::vector<FDTensor> *outputs) {
+  // 1. BGR2RGB
+  // 2. Convert(opencv style) or Normalize
+  for (size_t i = 0; i < images.size(); ++i) {
+    auto ret = BGR2RGB::Run(&images[i]);
+    if (!ret) {
+      FDERROR << "Failed to processs image:" << i << " in "
+              << "BGR2RGB"
+              << "." << std::endl;
+      return false;
+    }
+    ret = Cast::Run(&images[i], "float");
+    if (!ret) {
+      FDERROR << "Failed to processs image:" << i << " in "
+              << "Cast"
+              << "." << std::endl;
+      return false;
+    }
+    std::vector<float> mean{1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+    std::vector<float> std{-1.f, -1.f, -1.f};
+    ret = Convert::Run(&images[i], mean, std);
+    if (!ret) {
+      FDERROR << "Failed to processs image:" << i << " in "
+              << "Cast"
+              << "." << std::endl;
+      return false;
+    }
+  }
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images.size());
+  for (size_t i = 0; i < images.size(); ++i) {
+    images[i].ShareWithTensor(&(tensors[i]));
+    tensors[i].ExpandDim(0);
+  }
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+} // namespace generation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h
new file mode 100755
index 0000000000..6ba0abcbd1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/contrib/preprocessor.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/function/functions.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace generation {
+/*! @brief Preprocessor object for AnimeGAN serials model.
+ */
+class ULTRAINFER_DECL AnimeGANPreprocessor {
+public:
+  /** \brief Create a preprocessor instance for AnimeGAN serials model
+   */
+  AnimeGANPreprocessor() {}
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * wrapped by FDMat. \param[in] output The output tensors which will feed in
+   * runtime \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<Mat> &images, std::vector<FDTensor> *output);
+};
+
+} // namespace generation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc b/libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc
new file mode 100755
index 0000000000..0ef28f028b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/generation/generation_pybind.cc
@@ -0,0 +1,26 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindAnimeGAN(pybind11::module &m);
+
+void BindGeneration(pybind11::module &m) {
+  auto generation_module =
+      m.def_submodule("generation", "image generation submodule");
+  BindAnimeGAN(generation_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc
new file mode 100755
index 0000000000..7d6187a0fd
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.cc
@@ -0,0 +1,132 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/headpose/contrib/fsanet.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace headpose {
+
+FSANet::FSANet(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool FSANet::Initialize() {
+  // parameters for preprocess
+  size = {64, 64};
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool FSANet::Preprocess(Mat *mat, FDTensor *output,
+                        std::map<std::string, std::array<int, 2>> *im_info) {
+  // Resize
+  int resize_w = size[0];
+  int resize_h = size[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+
+  // Normalize
+  std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
+  std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f,
+                             -127.5f / 128.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool FSANet::Postprocess(
+    FDTensor &infer_result, HeadPoseResult *result,
+    const std::map<std::string, std::array<int, 2>> &im_info) {
+  FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
+  if (infer_result.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  auto iter_in = im_info.find("input_shape");
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
+  int in_h = iter_in->second[0];
+  int in_w = iter_in->second[1];
+
+  result->Clear();
+  float *data = static_cast<float *>(infer_result.Data());
+  for (size_t i = 0; i < 3; ++i) {
+    result->euler_angles.emplace_back(data[i]);
+  }
+
+  return true;
+}
+
+bool FSANet::Predict(cv::Mat *im, HeadPoseResult *result) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<int, 2>> im_info;
+
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {mat.Height(), mat.Width()};
+  im_info["output_shape"] = {mat.Height(), mat.Width()};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors[0], result, im_info)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace headpose
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h
new file mode 100755
index 0000000000..1cc2221c7a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace headpose {
+/*! @brief FSANet model object used when to load a FSANet model exported by
+ * FSANet.
+ */
+class ULTRAINFER_DECL FSANet : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./fsanet-var.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  FSANet(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "FSANet"; }
+  /** \brief Predict the face detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output face detection
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, HeadPoseResult *result);
+
+  /// tuple of (width, height), default (64, 64)
+  std::vector<int> size;
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<int, 2>> *im_info);
+
+  bool Postprocess(FDTensor &infer_result, HeadPoseResult *result,
+                   const std::map<std::string, std::array<int, 2>> &im_info);
+};
+
+} // namespace headpose
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc
new file mode 100755
index 0000000000..f876af64d5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/headpose/contrib/fsanet_pybind.cc
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindFSANet(pybind11::module &m) {
+  pybind11::class_<vision::headpose::FSANet, UltraInferModel>(m, "FSANet")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::headpose::FSANet &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::HeadPoseResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def_readwrite("size", &vision::headpose::FSANet::size);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc b/libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc
new file mode 100755
index 0000000000..e965a8de6e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/headpose/headpose_pybind.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindFSANet(pybind11::module &m);
+
+void BindHeadPose(pybind11::module &m) {
+  auto headpose_module = m.def_submodule("headpose", "Headpose models.");
+  BindFSANet(headpose_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc
new file mode 100755
index 0000000000..0a3319c0c0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/keypointdet_pybind.cc
@@ -0,0 +1,26 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindPPTinyPose(pybind11::module &m);
+
+void BindKeyPointDetection(pybind11::module &m) {
+  auto keypointdetection_module = m.def_submodule(
+      "keypointdetection", "Image object keypoint detection models.");
+  BindPPTinyPose(keypointdetection_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc
new file mode 100755
index 0000000000..fc2625b446
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.cc
@@ -0,0 +1,283 @@
+#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+#include "yaml-cpp/yaml.h"
+#ifdef ENABLE_PADDLE2ONNX
+#include "paddle2onnx/converter.h"
+#endif
+#include "ultrainfer/vision.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace keypointdetection {
+
+PPTinyPose::PPTinyPose(const std::string &model_file,
+                       const std::string &params_file,
+                       const std::string &config_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  config_file_ = config_file;
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                        Backend::LITE};
+  valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  valid_kunlunxin_backends = {Backend::LITE};
+  valid_rknpu_backends = {Backend::RKNPU2};
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool PPTinyPose::Initialize() {
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+    return false;
+  }
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool PPTinyPose::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  std::string arch = cfg["arch"].as<std::string>();
+  if (arch != "HRNet" && arch != "HigherHRNet") {
+    FDERROR << "Require the arch of model is HRNet or HigherHRNet, but arch "
+            << "defined in "
+            << "config file is " << arch << "." << std::endl;
+    return false;
+  }
+
+  processors_.push_back(std::make_shared<BGR2RGB>());
+
+  for (const auto &op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "NormalizeImage") {
+      if (!disable_normalize_) {
+        auto mean = op["mean"].as<std::vector<float>>();
+        auto std = op["std"].as<std::vector<float>>();
+        bool is_scale = op["is_scale"].as<bool>();
+        processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+      }
+    } else if (op_name == "Permute") {
+      if (!disable_permute_) {
+        // permute = cast<float> + HWC2CHW
+        processors_.push_back(std::make_shared<Cast>("float"));
+        processors_.push_back(std::make_shared<HWC2CHW>());
+      }
+    } else if (op_name == "TopDownEvalAffine") {
+      auto trainsize = op["trainsize"].as<std::vector<int>>();
+      int height = trainsize[1];
+      int width = trainsize[0];
+      cv::Mat trans_matrix(2, 3, CV_64FC1);
+      processors_.push_back(
+          std::make_shared<WarpAffine>(trans_matrix, width, height, 1));
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
+bool PPTinyPose::Preprocess(Mat *mat, std::vector<FDTensor> *outputs) {
+  for (size_t i = 0; i < processors_.size(); ++i) {
+    if (processors_[i]->Name().compare("WarpAffine") == 0) {
+      auto processor = dynamic_cast<WarpAffine *>(processors_[i].get());
+      float origin_width = static_cast<float>(mat->Width());
+      float origin_height = static_cast<float>(mat->Height());
+      std::vector<float> center = {origin_width / 2.0f, origin_height / 2.0f};
+      std::vector<float> scale = {origin_width, origin_height};
+      int resize_width = -1;
+      int resize_height = -1;
+      std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
+      cv::Mat trans_matrix(2, 3, CV_64FC1);
+      GetAffineTransform(center, scale, 0, {resize_width, resize_height},
+                         &trans_matrix, 0);
+      if (!(processor->SetTransformMatrix(trans_matrix))) {
+        FDERROR << "Failed to set transform matrix of "
+                << processors_[i]->Name() << " processor." << std::endl;
+      }
+    }
+    if (!(*(processors_[i].get()))(mat)) {
+      FDERROR << "Failed to process image data in " << processors_[i]->Name()
+              << "." << std::endl;
+      return false;
+    }
+  }
+
+  outputs->resize(1);
+  (*outputs)[0].name = InputInfoOfRuntime(0).name;
+  mat->ShareWithTensor(&((*outputs)[0]));
+
+  // reshape to [1, c, h, w]
+  (*outputs)[0].ExpandDim(0);
+
+  return true;
+}
+
+bool PPTinyPose::Postprocess(std::vector<FDTensor> &infer_result,
+                             KeyPointDetectionResult *result,
+                             const std::vector<float> &center,
+                             const std::vector<float> &scale) {
+  FDASSERT(infer_result[0].shape[0] == 1,
+           "Only support batch = 1 in UltraInfer now.");
+  result->Clear();
+
+  if (infer_result.size() == 1) {
+    FDTensor result_copy = infer_result[0];
+    result_copy.Reshape({result_copy.shape[0], result_copy.shape[1],
+                         result_copy.shape[2] * result_copy.shape[3]});
+    infer_result.resize(2);
+    function::ArgMax(result_copy, &infer_result[1], -1);
+  }
+
+  // Calculate output length
+  int outdata_size =
+      std::accumulate(infer_result[0].shape.begin(),
+                      infer_result[0].shape.end(), 1, std::multiplies<int>());
+  int idxdata_size =
+      std::accumulate(infer_result[1].shape.begin(),
+                      infer_result[1].shape.end(), 1, std::multiplies<int>());
+
+  if (outdata_size < 6) {
+    FDWARNING << "PPTinyPose No object detected." << std::endl;
+  }
+  float *out_data = static_cast<float *>(infer_result[0].Data());
+  void *idx_data = infer_result[1].Data();
+  int idx_dtype = infer_result[1].dtype;
+  std::vector<int> out_data_shape(infer_result[0].shape.begin(),
+                                  infer_result[0].shape.end());
+  std::vector<int> idx_data_shape(infer_result[1].shape.begin(),
+                                  infer_result[1].shape.end());
+  std::vector<float> preds(out_data_shape[1] * 3, 0);
+  std::vector<float> heatmap(out_data, out_data + outdata_size);
+  std::vector<int64_t> idxout(idxdata_size);
+  if (idx_dtype == FDDataType::INT32) {
+    std::copy(static_cast<int32_t *>(idx_data),
+              static_cast<int32_t *>(idx_data) + idxdata_size, idxout.begin());
+  } else if (idx_dtype == FDDataType::INT64) {
+    std::copy(static_cast<int64_t *>(idx_data),
+              static_cast<int64_t *>(idx_data) + idxdata_size, idxout.begin());
+  } else {
+    FDERROR << "Only support process inference result with INT32/INT64 data "
+               "type, but now it's "
+            << idx_dtype << "." << std::endl;
+  }
+  GetFinalPredictions(heatmap, out_data_shape, idxout, center, scale, &preds,
+                      this->use_dark);
+  result->Reserve(outdata_size);
+  result->num_joints = out_data_shape[1];
+  result->keypoints.clear();
+  for (int i = 0; i < out_data_shape[1]; i++) {
+    result->keypoints.push_back({preds[i * 3 + 1], preds[i * 3 + 2]});
+    result->scores.push_back(preds[i * 3]);
+  }
+  return true;
+}
+
+bool PPTinyPose::Predict(cv::Mat *im, KeyPointDetectionResult *result) {
+  std::vector<float> center = {round(im->cols / 2.0f), round(im->rows / 2.0f)};
+  std::vector<float> scale = {static_cast<float>(im->cols),
+                              static_cast<float>(im->rows)};
+  Mat mat(*im);
+  std::vector<FDTensor> processed_data;
+  if (!Preprocess(&mat, &processed_data)) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+
+  std::vector<FDTensor> infer_result;
+  if (!Infer(processed_data, &infer_result)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(infer_result, result, center, scale)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool PPTinyPose::Predict(cv::Mat *im, KeyPointDetectionResult *result,
+                         const DetectionResult &detection_result) {
+  std::vector<Mat> crop_imgs;
+  std::vector<std::vector<float>> center_bs;
+  std::vector<std::vector<float>> scale_bs;
+  int crop_imgs_num = 0;
+  int box_num = detection_result.boxes.size();
+  for (int i = 0; i < box_num; i++) {
+    auto box = detection_result.boxes[i];
+    auto label_id = detection_result.label_ids[i];
+    int channel = im->channels();
+    cv::Mat cv_crop_img(0, 0, CV_32SC(channel));
+    Mat crop_img(cv_crop_img);
+    std::vector<float> rect(box.begin(), box.end());
+    std::vector<float> center;
+    std::vector<float> scale;
+    if (label_id == 0) {
+      Mat mat(*im);
+      utils::CropImageByBox(mat, &crop_img, rect, &center, &scale);
+      center_bs.emplace_back(center);
+      scale_bs.emplace_back(scale);
+      crop_imgs.emplace_back(crop_img);
+      crop_imgs_num += 1;
+    }
+  }
+  for (int i = 0; i < crop_imgs_num; i++) {
+    std::vector<FDTensor> processed_data;
+    if (!Preprocess(&crop_imgs[i], &processed_data)) {
+      FDERROR << "Failed to preprocess input data while using model:"
+              << ModelName() << "." << std::endl;
+      return false;
+    }
+    std::vector<FDTensor> infer_result;
+    if (!Infer(processed_data, &infer_result)) {
+      FDERROR << "Failed to inference while using model:" << ModelName() << "."
+              << std::endl;
+      return false;
+    }
+    KeyPointDetectionResult one_cropimg_result;
+    if (!Postprocess(infer_result, &one_cropimg_result, center_bs[i],
+                     scale_bs[i])) {
+      FDERROR << "Failed to postprocess while using model:" << ModelName()
+              << "." << std::endl;
+      return false;
+    }
+    if (result->num_joints == -1) {
+      result->num_joints = one_cropimg_result.num_joints;
+    }
+    std::copy(one_cropimg_result.keypoints.begin(),
+              one_cropimg_result.keypoints.end(),
+              std::back_inserter(result->keypoints));
+    std::copy(one_cropimg_result.scores.begin(),
+              one_cropimg_result.scores.end(),
+              std::back_inserter(result->scores));
+  }
+
+  return true;
+}
+
+} // namespace keypointdetection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h
new file mode 100755
index 0000000000..2bd6f91c3a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose.h
@@ -0,0 +1,116 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All keypoint detection model APIs are defined inside this namespace
+ *
+ */
+namespace keypointdetection {
+
+/*! @brief PPTinyPose model object used when to load a PPTinyPose model exported
+ * by PaddleDetection
+ */
+class ULTRAINFER_DECL PPTinyPose : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g pptinypose/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g
+   * pptinypose/model.pdiparams, if the model format is ONNX, this parameter
+   * will be ignored \param[in] config_file Path of configuration file for
+   * deployment, e.g pptinypose/infer_cfg.yml \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends` \param[in] model_format Model
+   * format of the loaded model, default is Paddle format
+   */
+  PPTinyPose(const std::string &model_file, const std::string &params_file,
+             const std::string &config_file,
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /// Get model's name
+  std::string ModelName() const { return "PaddleDetection/PPTinyPose"; }
+
+  /** \brief Predict the keypoint detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread()
+   * \param[in] result The output keypoint detection result will be writen to
+   * this structure \return true if the keypoint prediction successed, otherwise
+   * false
+   */
+  bool Predict(cv::Mat *im, KeyPointDetectionResult *result);
+
+  /** \brief Predict the keypoint detection result with given detection result
+   * for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread()
+   * \param[in] result The output keypoint detection result will be writen to
+   * this structure \param[in] detection_result The structure strores pedestrian
+   * detection result, which is used to crop image for multi-persons keypoint
+   * detection \return true if the keypoint prediction successed, otherwise
+   * false
+   */
+  bool Predict(cv::Mat *im, KeyPointDetectionResult *result,
+               const DetectionResult &detection_result);
+
+  /** \brief Whether using Distribution-Aware Coordinate Representation for
+   * Human Pose Estimation(DARK for short) in postprocess, default is true
+   */
+  bool use_dark = true;
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() {
+    disable_normalize_ = true;
+    BuildPreprocessPipelineFromConfig();
+  }
+
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() {
+    disable_permute_ = true;
+    BuildPreprocessPipelineFromConfig();
+  }
+
+protected:
+  bool Initialize();
+  /// Build the preprocess pipeline from the loaded model
+  bool BuildPreprocessPipelineFromConfig();
+  /// Preprocess an input image, and set the preprocessed results to `outputs`
+  bool Preprocess(Mat *mat, std::vector<FDTensor> *outputs);
+
+  /// Postprocess the inferenced results, and set the final result to `result`
+  bool Postprocess(std::vector<FDTensor> &infer_result,
+                   KeyPointDetectionResult *result,
+                   const std::vector<float> &center,
+                   const std::vector<float> &scale);
+
+private:
+  std::vector<std::shared_ptr<Processor>> processors_;
+  std::string config_file_;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+};
+} // namespace keypointdetection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc
new file mode 100755
index 0000000000..51891509fe
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_pybind.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPTinyPose(pybind11::module &m) {
+  pybind11::class_<vision::keypointdetection::PPTinyPose, UltraInferModel>(
+      m, "PPTinyPose")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::keypointdetection::PPTinyPose &self,
+              pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::KeyPointDetectionResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("predict",
+           [](vision::keypointdetection::PPTinyPose &self,
+              pybind11::array &data,
+              vision::DetectionResult &detection_result) {
+             auto mat = PyArrayToCvMat(data);
+             vision::KeyPointDetectionResult res;
+             self.Predict(&mat, &res, detection_result);
+             return res;
+           })
+      .def("disable_normalize",
+           [](vision::keypointdetection::PPTinyPose &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::keypointdetection::PPTinyPose &self) {
+             self.DisablePermute();
+           })
+      .def_readwrite("use_dark",
+                     &vision::keypointdetection::PPTinyPose::use_dark);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc
new file mode 100755
index 0000000000..50a708f4cf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.cc
@@ -0,0 +1,125 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h"
+#define PI 3.1415926535
+#define HALF_CIRCLE_DEGREE 180
+
+namespace ultrainfer {
+namespace vision {
+namespace keypointdetection {
+
+cv::Point2f Get3dPoint(const cv::Point2f &a, const cv::Point2f &b) {
+  cv::Point2f direct{a.x - b.x, a.y - b.y};
+  return cv::Point2f(a.x - direct.y, a.y + direct.x);
+}
+
+std::vector<float> GetDir(const float src_point_x, const float src_point_y,
+                          const float rot_rad) {
+  float sn = sin(rot_rad);
+  float cs = cos(rot_rad);
+  std::vector<float> src_result{0.0, 0.0};
+  src_result[0] = src_point_x * cs - src_point_y * sn;
+  src_result[1] = src_point_x * sn + src_point_y * cs;
+  return src_result;
+}
+
+void AffineTransform(const float pt_x, const float pt_y, const cv::Mat &trans,
+                     std::vector<float> *preds, const int p) {
+  double new1[3] = {pt_x, pt_y, 1.0};
+  cv::Mat new_pt(3, 1, trans.type(), new1);
+  cv::Mat w = trans * new_pt;
+  (*preds)[p * 3 + 1] = static_cast<float>(w.at<double>(0, 0));
+  (*preds)[p * 3 + 2] = static_cast<float>(w.at<double>(1, 0));
+}
+
+void GetAffineTransform(const std::vector<float> &center,
+                        const std::vector<float> &scale, const float rot,
+                        const std::vector<int> &output_size, cv::Mat *trans,
+                        const int inv) {
+  float src_w = scale[0];
+  float dst_w = static_cast<float>(output_size[0]);
+  float dst_h = static_cast<float>(output_size[1]);
+  float rot_rad = rot * PI / HALF_CIRCLE_DEGREE;
+  std::vector<float> src_dir = GetDir(-0.5 * src_w, 0, rot_rad);
+  std::vector<float> dst_dir{-0.5f * dst_w, 0.0};
+  cv::Point2f srcPoint2f[3], dstPoint2f[3];
+  srcPoint2f[0] = cv::Point2f(center[0], center[1]);
+  srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]);
+  srcPoint2f[2] = Get3dPoint(srcPoint2f[0], srcPoint2f[1]);
+
+  dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
+  dstPoint2f[1] =
+      cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
+  dstPoint2f[2] = Get3dPoint(dstPoint2f[0], dstPoint2f[1]);
+  if (inv == 0) {
+    (*trans) = cv::getAffineTransform(srcPoint2f, dstPoint2f);
+  } else {
+    (*trans) = cv::getAffineTransform(dstPoint2f, srcPoint2f);
+  }
+}
+
+void TransformPreds(std::vector<float> &coords,
+                    const std::vector<float> &center,
+                    const std::vector<float> &scale,
+                    const std::vector<int> &output_size,
+                    const std::vector<int> &dim,
+                    std::vector<float> *target_coords) {
+  cv::Mat trans(2, 3, CV_64FC1);
+  GetAffineTransform(center, scale, 0, output_size, &trans, 1);
+  for (int p = 0; p < dim[1]; ++p) {
+    AffineTransform(coords[p * 2], coords[p * 2 + 1], trans, target_coords, p);
+  }
+}
+
+void GetFinalPredictions(const std::vector<float> &heatmap,
+                         const std::vector<int> &dim,
+                         const std::vector<int64_t> &idxout,
+                         const std::vector<float> &center,
+                         const std::vector<float> scale,
+                         std::vector<float> *preds, const bool DARK) {
+  std::vector<float> coords(dim[1] * 2);
+
+  int heatmap_height = dim[2];
+  int heatmap_width = dim[3];
+  for (int j = 0; j < dim[1]; ++j) {
+    int index = j * dim[2] * dim[3];
+    int idx = idxout[j];
+    (*preds)[j * 3] = heatmap[index + idx];
+    coords[j * 2] = idx % heatmap_width;
+    coords[j * 2 + 1] = idx / heatmap_width;
+    int px = int(coords[j * 2] + 0.5);
+    int py = int(coords[j * 2 + 1] + 0.5);
+    if (DARK && px > 1 && px < heatmap_width - 2) {
+      utils::DarkParse(heatmap, dim, &coords, px, py, index, j);
+    } else {
+      if (px > 0 && px < heatmap_width - 1) {
+        float diff_x = heatmap[index + py * dim[3] + px + 1] -
+                       heatmap[index + py * dim[3] + px - 1];
+        coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25;
+      }
+      if (py > 0 && py < heatmap_height - 1) {
+        float diff_y = heatmap[index + (py + 1) * dim[3] + px] -
+                       heatmap[index + (py - 1) * dim[3] + px];
+        coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25;
+      }
+    }
+  }
+  std::vector<int> img_size{heatmap_width, heatmap_height};
+  TransformPreds(coords, center, scale, img_size, dim, preds);
+}
+
+} // namespace keypointdetection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h
new file mode 100755
index 0000000000..5db2da1517
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/keypointdet/pptinypose/pptinypose_utils.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace keypointdetection {
+
+cv::Point2f Get3dPoint(const cv::Point2f &a, const cv::Point2f &b);
+
+std::vector<float> GetDir(const float src_point_x, const float src_point_y,
+                          const float rot_rad);
+
+void GetAffineTransform(const std::vector<float> &center,
+                        const std::vector<float> &scale, const float rot,
+                        const std::vector<int> &output_size, cv::Mat *trans,
+                        const int inv);
+
+void AffineTransform(const float pt_x, const float pt_y, const cv::Mat &trans,
+                     std::vector<float> *preds, const int p);
+
+void TransformPreds(std::vector<float> &coords,
+                    const std::vector<float> &center,
+                    const std::vector<float> &scale,
+                    const std::vector<int> &output_size,
+                    const std::vector<int> &dim,
+                    std::vector<float> *target_coords);
+
+void GetFinalPredictions(const std::vector<float> &heatmap,
+                         const std::vector<int> &dim,
+                         const std::vector<int64_t> &idxout,
+                         const std::vector<float> &center,
+                         const std::vector<float> scale,
+                         std::vector<float> *preds, const bool DARK);
+
+} // namespace keypointdetection
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc
new file mode 100755
index 0000000000..3ef17008ee
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.cc
@@ -0,0 +1,155 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/matting/contrib/modnet.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace matting {
+
+MODNet::MODNet(const std::string &model_file, const std::string &params_file,
+               const RuntimeOption &custom_option,
+               const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool MODNet::Initialize() {
+  // parameters for preprocess
+  size = {256, 256};
+  alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+  beta = {-1.f, -1.f, -1.f}; // RGB
+  swap_rb = true;
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool MODNet::Preprocess(Mat *mat, FDTensor *output,
+                        std::map<std::string, std::array<int, 2>> *im_info) {
+  // 1. Resize
+  // 2. BGR2RGB
+  // 3. Convert(opencv style) or Normalize
+  // 4. HWC2CHW
+  int resize_w = size[0];
+  int resize_h = size[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (swap_rb) {
+    BGR2RGB::Run(mat);
+  }
+
+  Convert::Run(mat, alpha, beta);
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
+  return true;
+}
+
+bool MODNet::Postprocess(
+    std::vector<FDTensor> &infer_result, MattingResult *result,
+    const std::map<std::string, std::array<int, 2>> &im_info) {
+  FDASSERT((infer_result.size() == 1),
+           "The default number of output tensor must be 1 according to "
+           "modnet.");
+  FDTensor &alpha_tensor = infer_result.at(0); // (1, 1, h, w)
+  FDASSERT((alpha_tensor.shape[0] == 1), "Only support batch =1 now.");
+  if (alpha_tensor.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  auto iter_ipt = im_info.find("input_shape");
+  auto iter_out = im_info.find("output_shape");
+  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  int out_h = iter_out->second[0];
+  int out_w = iter_out->second[1];
+  int ipt_h = iter_ipt->second[0];
+  int ipt_w = iter_ipt->second[1];
+
+  float *alpha_ptr = static_cast<float *>(alpha_tensor.Data());
+  // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr);
+  // Mat alpha_resized(alpha_zero_copy_ref);  // ref-only, zero copy.
+  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+                                  alpha_ptr); // ref-only, zero copy.
+  if ((out_h != ipt_h) || (out_w != ipt_w)) {
+    Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1);
+  }
+
+  result->Clear();
+  // note: must be setup shape before Resize
+  result->contain_foreground = false;
+  result->shape = {static_cast<int64_t>(ipt_h), static_cast<int64_t>(ipt_w)};
+  int numel = ipt_h * ipt_w;
+  int nbytes = numel * sizeof(float);
+  result->Resize(numel);
+  std::memcpy(result->alpha.data(), alpha_resized.Data(), nbytes);
+  return true;
+}
+
+bool MODNet::Predict(cv::Mat *im, MattingResult *result) {
+  Mat mat(*im);
+  std::vector<FDTensor> input_tensors(1);
+
+  std::map<std::string, std::array<int, 2>> im_info;
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {mat.Height(), mat.Width()};
+  im_info["output_shape"] = {mat.Height(), mat.Width()};
+
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors, result, im_info)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace matting
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h
new file mode 100755
index 0000000000..c1008a1e92
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace matting {
+/*! @brief MODNet model object used when to load a MODNet model exported by
+ * MODNet.
+ */
+class ULTRAINFER_DECL MODNet : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./modnet.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is ONNX format
+   */
+  MODNet(const std::string &model_file, const std::string &params_file = "",
+         const RuntimeOption &custom_option = RuntimeOption(),
+         const ModelFormat &model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "matting/MODNet"; }
+
+  /*! @brief
+  Argument for image preprocessing step, tuple of (width, height), decide the
+  target size after resize, default (256, 256)
+  */
+  std::vector<int> size;
+  /*! @brief
+  Argument for image preprocessing step, parameters for normalization, size
+  should be the the same as channels, default alpha = {1.f / 127.5f, 1.f /
+  127.5f, 1.f / 127.5f}
+  */
+  std::vector<float> alpha;
+  /*! @brief
+  Argument for image preprocessing step, parameters for normalization, size
+  should be the the same as channels, default beta = {-1.f, -1.f, -1.f}
+  */
+  std::vector<float> beta;
+  /*! @brief
+  Argument for image preprocessing step, whether to swap the B and R channel,
+  such as BGR->RGB, default true.
+  */
+  bool swap_rb;
+  /** \brief Predict the matting result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output matting result
+   * will be writen to this structure \return true if the prediction successed,
+   * otherwise false
+   */
+  bool Predict(cv::Mat *im, MattingResult *result);
+
+private:
+  bool Initialize();
+
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<int, 2>> *im_info);
+
+  bool Postprocess(std::vector<FDTensor> &infer_result, MattingResult *result,
+                   const std::map<std::string, std::array<int, 2>> &im_info);
+};
+
+} // namespace matting
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc
new file mode 100755
index 0000000000..0432929c00
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/modnet_pybind.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindMODNet(pybind11::module &m) {
+  // Bind MODNet
+  pybind11::class_<vision::matting::MODNet, UltraInferModel>(m, "MODNet")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::matting::MODNet &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::MattingResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def_readwrite("size", &vision::matting::MODNet::size)
+      .def_readwrite("alpha", &vision::matting::MODNet::alpha)
+      .def_readwrite("beta", &vision::matting::MODNet::beta)
+      .def_readwrite("swap_rb", &vision::matting::MODNet::swap_rb);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc
new file mode 100755
index 0000000000..d8c4ee4c79
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.cc
@@ -0,0 +1,183 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/matting/contrib/rvm.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+namespace matting {
+
+RobustVideoMatting::RobustVideoMatting(const std::string &model_file,
+                                       const std::string &params_file,
+                                       const RuntimeOption &custom_option,
+                                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool RobustVideoMatting::Initialize() {
+  // parameters for preprocess
+  size = {1080, 1920};
+
+  video_mode = true;
+
+  swap_rb = true;
+
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool RobustVideoMatting::Preprocess(
+    Mat *mat, FDTensor *output,
+    std::map<std::string, std::array<int, 2>> *im_info) {
+  // Resize
+  int resize_w = size[0];
+  int resize_h = size[1];
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  // Convert_and_permute(swap_rb=true)
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, swap_rb);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0); // reshape to n, c, h, w
+  return true;
+}
+
+bool RobustVideoMatting::Postprocess(
+    std::vector<FDTensor> &infer_result, MattingResult *result,
+    const std::map<std::string, std::array<int, 2>> &im_info) {
+  FDASSERT((infer_result.size() == 6),
+           "The default number of output tensor must be 6 according to "
+           "RobustVideoMatting.");
+  FDTensor &fgr = infer_result.at(0);   // fgr (1, 3, h, w) 0.~1.
+  FDTensor &alpha = infer_result.at(1); // alpha (1, 1, h, w) 0.~1.
+  FDASSERT((fgr.shape[0] == 1), "Only support batch = 1 now.");
+  FDASSERT((alpha.shape[0] == 1), "Only support batch = 1 now.");
+  if (fgr.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  if (alpha.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  // update context
+  if (video_mode) {
+    for (size_t i = 0; i < 4; ++i) {
+      FDTensor &rki = infer_result.at(i + 2);
+      dynamic_inputs_dims_[i] = rki.shape;
+      dynamic_inputs_datas_[i].resize(rki.Numel());
+      memcpy(dynamic_inputs_datas_[i].data(), rki.Data(),
+             rki.Numel() * FDDataTypeSize(rki.dtype));
+    }
+  }
+
+  auto iter_in = im_info.find("input_shape");
+  auto iter_out = im_info.find("output_shape");
+  FDASSERT(iter_out != im_info.end() && iter_in != im_info.end(),
+           "Cannot find input_shape or output_shape from im_info.");
+  int out_h = iter_out->second[0];
+  int out_w = iter_out->second[1];
+  int in_h = iter_in->second[0];
+  int in_w = iter_in->second[1];
+
+  // for alpha
+  float *alpha_ptr = static_cast<float *>(alpha.Data());
+  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+                                  alpha_ptr); // ref-only, zero copy.
+  if ((out_h != in_h) || (out_w != in_w)) {
+    Resize::Run(&alpha_resized, in_w, in_h, -1, -1);
+  }
+
+  // for foreground
+  float *fgr_ptr = static_cast<float *>(fgr.Data());
+  Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+                                fgr_ptr); // ref-only, zero copy.
+  if ((out_h != in_h) || (out_w != in_w)) {
+    Resize::Run(&fgr_resized, in_w, in_h, -1, -1);
+  }
+
+  result->contain_foreground = true;
+  // if contain_foreground == true, shape must set to (h, w, c)
+  result->shape = {static_cast<int64_t>(in_h), static_cast<int64_t>(in_w), 3};
+  int numel = in_h * in_w;
+  int nbytes = numel * sizeof(float);
+  result->Resize(numel);
+  memcpy(result->alpha.data(), alpha_resized.Data(), nbytes);
+  memcpy(result->foreground.data(), fgr_resized.Data(), nbytes);
+  return true;
+}
+
+bool RobustVideoMatting::Predict(cv::Mat *im, MattingResult *result) {
+  Mat mat(*im);
+  int inputs_nums = NumInputsOfRuntime();
+  std::vector<FDTensor> input_tensors(inputs_nums);
+  std::map<std::string, std::array<int, 2>> im_info;
+  // Record the shape of image and the shape of preprocessed image
+  im_info["input_shape"] = {mat.Height(), mat.Width()};
+  im_info["output_shape"] = {mat.Height(), mat.Width()};
+  // convert vector to FDTensor
+  for (size_t i = 1; i < inputs_nums; ++i) {
+    input_tensors[i].SetExternalData(dynamic_inputs_dims_[i - 1],
+                                     FDDataType::FP32,
+                                     dynamic_inputs_datas_[i - 1].data());
+    input_tensors[i].device = Device::CPU;
+  }
+  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  for (size_t i = 0; i < inputs_nums; ++i) {
+    input_tensors[i].name = InputInfoOfRuntime(i).name;
+  }
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors, result, im_info)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace matting
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h
new file mode 100755
index 0000000000..ac9a2fde8b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm.h
@@ -0,0 +1,101 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+
+namespace vision {
+/** \brief All image/video matting model APIs are defined inside this namespace
+ *
+ */
+namespace matting {
+
+/*! @brief RobustVideoMatting model object used when to load a
+ * RobustVideoMatting model exported by RobustVideoMatting
+ */
+class ULTRAINFER_DECL RobustVideoMatting : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g rvm/rvm_mobilenetv3_fp32.onnx
+   * \param[in] params_file Path of parameter file, if the model format is ONNX,
+   * this parameter will be ignored \param[in] custom_option RuntimeOption for
+   * inference, the default will use cpu, and choose the backend defined in
+   * `valid_cpu_backends` \param[in] model_format Model format of the loaded
+   * model, default is ONNX format
+   */
+  RobustVideoMatting(const std::string &model_file,
+                     const std::string &params_file = "",
+                     const RuntimeOption &custom_option = RuntimeOption(),
+                     const ModelFormat &model_format = ModelFormat::ONNX);
+
+  /// Get model's name
+  std::string ModelName() const { return "matting/RobustVideoMatting"; }
+
+  /** \brief Predict the matting result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread()
+   * \param[in] result The output matting result will be writen to this
+   * structure \return true if the prediction successed, otherwise false
+   */
+  bool Predict(cv::Mat *im, MattingResult *result);
+
+  /// Preprocess image size, the default is (1080, 1920)
+  std::vector<int> size;
+
+  /// Whether to open the video mode, if there are some irrelevant pictures, set
+  /// it to fasle, the default is true // NOLINT
+  bool video_mode;
+
+  /// Whether convert to RGB, Set to false if you have converted YUV format
+  /// images to RGB outside the model, dafault true // NOLINT
+  bool swap_rb;
+
+private:
+  bool Initialize();
+  /// Preprocess an input image, and set the preprocessed results to `outputs`
+  bool Preprocess(Mat *mat, FDTensor *output,
+                  std::map<std::string, std::array<int, 2>> *im_info);
+
+  /// Postprocess the inferenced results, and set the final result to `result`
+  bool Postprocess(std::vector<FDTensor> &infer_result, MattingResult *result,
+                   const std::map<std::string, std::array<int, 2>> &im_info);
+
+  /// Init dynamic inputs datas
+  std::vector<std::vector<float>> dynamic_inputs_datas_ = {
+      {0.0f},  // r1i
+      {0.0f},  // r2i
+      {0.0f},  // r3i
+      {0.0f},  // r4i
+      {0.25f}, // downsample_ratio
+  };
+
+  /// Init dynamic inputs dims
+  std::vector<std::vector<int64_t>> dynamic_inputs_dims_ = {
+      {1, 1, 1, 1}, // r1i
+      {1, 1, 1, 1}, // r2i
+      {1, 1, 1, 1}, // r3i
+      {1, 1, 1, 1}, // r4i
+      {1},          // downsample_ratio
+  };
+};
+
+} // namespace matting
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc
new file mode 100755
index 0000000000..a54f7779cf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/contrib/rvm_pybind.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindRobustVideoMatting(pybind11::module &m) {
+  // Bind RobustVideoMatting
+  pybind11::class_<vision::matting::RobustVideoMatting, UltraInferModel>(
+      m, "RobustVideoMatting")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(
+          "predict",
+          [](vision::matting::RobustVideoMatting &self, pybind11::array &data) {
+            auto mat = PyArrayToCvMat(data);
+            vision::MattingResult res;
+            self.Predict(&mat, &res);
+            return res;
+          })
+      .def_readwrite("size", &vision::matting::RobustVideoMatting::size)
+      .def_readwrite("video_mode",
+                     &vision::matting::RobustVideoMatting::video_mode)
+      .def_readwrite("swap_rb", &vision::matting::RobustVideoMatting::swap_rb);
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc
new file mode 100755
index 0000000000..5986a7b594
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/matting_pybind.cc
@@ -0,0 +1,30 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindMODNet(pybind11::module &m);
+void BindRobustVideoMatting(pybind11::module &m);
+void BindPPMatting(pybind11::module &m);
+
+void BindMatting(pybind11::module &m) {
+  auto matting_module =
+      m.def_submodule("matting", "Image/Video matting models.");
+  BindMODNet(matting_module);
+  BindRobustVideoMatting(matting_module);
+  BindPPMatting(matting_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc
new file mode 100755
index 0000000000..8a234a31be
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.cc
@@ -0,0 +1,234 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/matting/ppmatting/ppmatting.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace matting {
+
+PPMatting::PPMatting(const std::string &model_file,
+                     const std::string &params_file,
+                     const std::string &config_file,
+                     const RuntimeOption &custom_option,
+                     const ModelFormat &model_format) {
+  config_file_ = config_file;
+  valid_cpu_backends = {Backend::ORT, Backend::PDINFER, Backend::LITE};
+  valid_gpu_backends = {Backend::PDINFER, Backend::TRT};
+  valid_kunlunxin_backends = {Backend::LITE};
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool PPMatting::Initialize() {
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+    return false;
+  }
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPMatting::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  processors_.push_back(std::make_shared<BGR2RGB>());
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  FDASSERT((cfg["Deploy"]["input_shape"]),
+           "The yaml file should include input_shape parameters");
+  // input_shape
+  // b c h w
+  auto input_shape = cfg["Deploy"]["input_shape"].as<std::vector<int>>();
+  FDASSERT(input_shape.size() == 4,
+           "The input_shape in yaml file need to be 4-dimensions, but now its "
+           "dimension is %zu.",
+           input_shape.size());
+
+  is_fixed_input_shape_ = false;
+  if (input_shape[2] > 0 && input_shape[3] > 0) {
+    is_fixed_input_shape_ = true;
+  }
+  if (input_shape[2] < 0 || input_shape[3] < 0) {
+    FDWARNING << "Detected dynamic input shape of your model, only Paddle "
+                 "Inference / OpenVINO support this model now."
+              << std::endl;
+  }
+  if (cfg["Deploy"]["transforms"]) {
+    auto preprocess_cfg = cfg["Deploy"]["transforms"];
+    int long_size = -1;
+    for (const auto &op : preprocess_cfg) {
+      FDASSERT(op.IsMap(),
+               "Require the transform information in yaml be Map type.");
+      if (op["type"].as<std::string>() == "LimitShort") {
+        int max_short = op["max_short"] ? op["max_short"].as<int>() : -1;
+        int min_short = op["min_short"] ? op["min_short"].as<int>() : -1;
+        if (is_fixed_input_shape_) {
+          // if the input shape is fixed, will resize by scale, and the max
+          // shape will not exceed input_shape
+          long_size = max_short;
+          std::vector<int> max_size = {input_shape[2], input_shape[3]};
+          processors_.push_back(
+              std::make_shared<ResizeByShort>(long_size, 1, true, max_size));
+        } else {
+          processors_.push_back(
+              std::make_shared<LimitShort>(max_short, min_short));
+        }
+      } else if (op["type"].as<std::string>() == "ResizeToIntMult") {
+        if (is_fixed_input_shape_) {
+          std::vector<int> max_size = {input_shape[2], input_shape[3]};
+          processors_.push_back(
+              std::make_shared<ResizeByShort>(long_size, 1, true, max_size));
+        } else {
+          int mult_int = op["mult_int"] ? op["mult_int"].as<int>() : 32;
+          processors_.push_back(std::make_shared<LimitByStride>(mult_int));
+        }
+      } else if (op["type"].as<std::string>() == "Normalize") {
+        std::vector<float> mean = {0.5, 0.5, 0.5};
+        std::vector<float> std = {0.5, 0.5, 0.5};
+        if (op["mean"]) {
+          mean = op["mean"].as<std::vector<float>>();
+        }
+        if (op["std"]) {
+          std = op["std"].as<std::vector<float>>();
+        }
+        processors_.push_back(std::make_shared<Normalize>(mean, std));
+      } else if (op["type"].as<std::string>() == "ResizeByShort") {
+        long_size = op["short_size"].as<int>();
+        if (is_fixed_input_shape_) {
+          std::vector<int> max_size = {input_shape[2], input_shape[3]};
+          processors_.push_back(
+              std::make_shared<ResizeByShort>(long_size, 1, true, max_size));
+        } else {
+          processors_.push_back(std::make_shared<ResizeByShort>(long_size));
+        }
+      }
+    }
+    // the default padding value is {127.5,127.5,127.5} so after normalizing,
+    // ((127.5/255)-0.5)/0.5 = 0.0
+    std::vector<float> value = {0.0, 0.0, 0.0};
+    processors_.push_back(std::make_shared<Cast>("float"));
+    processors_.push_back(
+        std::make_shared<PadToSize>(input_shape[3], input_shape[2], value));
+    processors_.push_back(std::make_shared<HWC2CHW>());
+  }
+
+  return true;
+}
+
+bool PPMatting::Preprocess(Mat *mat, FDTensor *output,
+                           std::map<std::string, std::array<int, 2>> *im_info) {
+  (*im_info)["input_shape"] = {mat->Height(), mat->Width()};
+  for (size_t i = 0; i < processors_.size(); ++i) {
+    if (!(*(processors_[i].get()))(mat)) {
+      FDERROR << "Failed to process image data in " << processors_[i]->Name()
+              << "." << std::endl;
+      return false;
+    }
+  }
+  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1);
+  output->name = InputInfoOfRuntime(0).name;
+  return true;
+}
+
+bool PPMatting::Postprocess(
+    std::vector<FDTensor> &infer_result, MattingResult *result,
+    const std::map<std::string, std::array<int, 2>> &im_info) {
+  FDASSERT((infer_result.size() == 1),
+           "The default number of output tensor must be 1 ");
+  FDTensor &alpha_tensor = infer_result.at(0); // (1, 1, h, w)
+  FDASSERT((alpha_tensor.shape[0] == 1), "Only support batch = 1 now.");
+  if (alpha_tensor.dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  std::vector<int64_t> dim{0, 2, 3, 1};
+  function::Transpose(alpha_tensor, &alpha_tensor, dim);
+  alpha_tensor.Squeeze(0);
+  Mat mat = Mat::Create(alpha_tensor);
+
+  auto iter_ipt = im_info.find("input_shape");
+  auto iter_out = im_info.find("output_shape");
+  if (is_fixed_input_shape_) {
+    double scale_h = static_cast<double>(iter_out->second[0]) /
+                     static_cast<double>(iter_ipt->second[0]);
+    double scale_w = static_cast<double>(iter_out->second[1]) /
+                     static_cast<double>(iter_ipt->second[1]);
+    double actual_scale = std::min(scale_h, scale_w);
+
+    int size_before_pad_h = round(actual_scale * iter_ipt->second[0]);
+    int size_before_pad_w = round(actual_scale * iter_ipt->second[1]);
+
+    Crop::Run(&mat, 0, 0, size_before_pad_w, size_before_pad_h);
+  }
+
+  Resize::Run(&mat, iter_ipt->second[1], iter_ipt->second[0], -1.0f, -1.0f, 1,
+              false, ProcLib::OPENCV);
+
+  result->Clear();
+  // note: must be setup shape before Resize
+  result->contain_foreground = false;
+  result->shape = {iter_ipt->second[0], iter_ipt->second[1]};
+  int numel = iter_ipt->second[0] * iter_ipt->second[1];
+  int nbytes = numel * sizeof(float);
+  result->Resize(numel);
+  std::memcpy(result->alpha.data(), mat.Data(), nbytes);
+  return true;
+}
+
+bool PPMatting::Predict(cv::Mat *im, MattingResult *result) {
+  Mat mat(*im);
+  std::vector<FDTensor> processed_data(1);
+
+  std::map<std::string, std::array<int, 2>> im_info;
+
+  if (!Preprocess(&mat, &(processed_data[0]), &im_info)) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+  std::vector<FDTensor> infer_result(1);
+  if (!Infer(processed_data, &infer_result)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  if (!Postprocess(infer_result, result, im_info)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace matting
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h
new file mode 100755
index 0000000000..ebd45a8248
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting.h
@@ -0,0 +1,75 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All object matting model APIs are defined inside this namespace
+ *
+ */
+namespace matting {
+/*! @brief PPMatting model object used when to load a PPMatting model exported
+ * by PPMatting.
+ */
+class ULTRAINFER_DECL PPMatting : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g PPMatting-512/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g
+   * PPMatting-512/model.pdiparams, if the model format is ONNX, this parameter
+   * will be ignored \param[in] config_file Path of configuration file for
+   * deployment, e.g PPMatting-512/infer_cfg.yml \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends` \param[in] model_format Model
+   * format of the loaded model, default is Paddle format
+   */
+  PPMatting(const std::string &model_file, const std::string &params_file,
+            const std::string &config_file,
+            const RuntimeOption &custom_option = RuntimeOption(),
+            const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "PaddleMatting"; }
+  /** \brief Predict the matting result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output matting result
+   * will be writen to this structure \return true if the prediction successed,
+   * otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, MattingResult *result);
+
+private:
+  bool Initialize();
+
+  bool BuildPreprocessPipelineFromConfig();
+
+  bool Preprocess(Mat *mat, FDTensor *outputs,
+                  std::map<std::string, std::array<int, 2>> *im_info);
+
+  bool Postprocess(std::vector<FDTensor> &infer_result, MattingResult *result,
+                   const std::map<std::string, std::array<int, 2>> &im_info);
+
+  std::vector<std::shared_ptr<Processor>> processors_;
+  std::string config_file_;
+  bool is_fixed_input_shape_;
+};
+
+} // namespace matting
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc
new file mode 100755
index 0000000000..6a41147230
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/matting/ppmatting/ppmatting_pybind.cc
@@ -0,0 +1,29 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPMatting(pybind11::module &m) {
+  pybind11::class_<vision::matting::PPMatting, UltraInferModel>(m, "PPMatting")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::matting::PPMatting &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::MattingResult res;
+             self.Predict(&mat, &res);
+             return res;
+           });
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc
new file mode 100755
index 0000000000..1636646a54
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ocr_pybind.cc
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindPPOCRModel(pybind11::module &m);
+void BindPPOCRv4(pybind11::module &m);
+void BindPPOCRv3(pybind11::module &m);
+void BindPPOCRv2(pybind11::module &m);
+void BindPPStructureV2Table(pybind11::module &m);
+
+void BindOcr(pybind11::module &m) {
+  auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models");
+  BindPPOCRModel(ocr_module);
+  BindPPOCRv4(ocr_module);
+  BindPPOCRv3(ocr_module);
+  BindPPOCRv2(ocr_module);
+  BindPPStructureV2Table(ocr_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc
new file mode 100755
index 0000000000..649aa330bd
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.cc
@@ -0,0 +1,128 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/classifier.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+Classifier::Classifier() {}
+Classifier::Classifier(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+
+  initialized = Initialize();
+}
+
+bool Classifier::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+std::unique_ptr<Classifier> Classifier::Clone() const {
+  std::unique_ptr<Classifier> clone_model =
+      utils::make_unique<Classifier>(Classifier(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool Classifier::Predict(const cv::Mat &img, int32_t *cls_label,
+                         float *cls_score) {
+  std::vector<int32_t> cls_labels(1);
+  std::vector<float> cls_scores(1);
+  bool success = BatchPredict({img}, &cls_labels, &cls_scores);
+  if (!success) {
+    return success;
+  }
+  *cls_label = cls_labels[0];
+  *cls_score = cls_scores[0];
+  return true;
+}
+
+bool Classifier::Predict(const cv::Mat &img, vision::OCRResult *ocr_result) {
+  ocr_result->cls_labels.resize(1);
+  ocr_result->cls_scores.resize(1);
+  if (!Predict(img, &(ocr_result->cls_labels[0]),
+               &(ocr_result->cls_scores[0]))) {
+    return false;
+  }
+  return true;
+}
+
+bool Classifier::BatchPredict(const std::vector<cv::Mat> &images,
+                              vision::OCRResult *ocr_result) {
+  return BatchPredict(images, &(ocr_result->cls_labels),
+                      &(ocr_result->cls_scores));
+}
+
+bool Classifier::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<int32_t> *cls_labels,
+                              std::vector<float> *cls_scores) {
+  return BatchPredict(images, cls_labels, cls_scores, 0, images.size());
+}
+
+bool Classifier::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<int32_t> *cls_labels,
+                              std::vector<float> *cls_scores,
+                              size_t start_index, size_t end_index) {
+  size_t total_size = images.size();
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
+                         end_index)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores,
+                          start_index, total_size)) {
+    FDERROR << "Failed to postprocess the inference cls_results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h
new file mode 100755
index 0000000000..d54e3dc378
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/classifier.h
@@ -0,0 +1,123 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/cls_postprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/cls_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All OCR series model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+/*! @brief Classifier object is used to load the classification model provided
+ * by PaddleOCR.
+ */
+class ULTRAINFER_DECL Classifier : public UltraInferModel {
+public:
+  Classifier();
+  /** \brief Set path of model file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel. \param[in] params_file Path
+   * of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if
+   * the model format is ONNX, this parameter will be ignored. \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in `valid_cpu_backends`. \param[in] model_format
+   * Model format of the loaded model, default is Paddle format.
+   */
+  Classifier(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new Classifier with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new Classifier* type unique pointer
+   */
+  virtual std::unique_ptr<Classifier> Clone() const;
+
+  /// Get model's name
+  std::string ModelName() const { return "ppocr/ocr_cls"; }
+
+  /** \brief Predict the input image and get OCR classification model
+   * cls_result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] cls_label The label result of
+   * cls model will be written in to this param. \param[in] cls_score The score
+   * result of cls model will be written in to this param. \return true if the
+   * prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img, int32_t *cls_label,
+                       float *cls_score);
+
+  /** \brief Predict the input image and get OCR recognition model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR
+   * recognition model result will be writen to this structure. \return true if
+   * the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result);
+
+  /** \brief BatchPredict the input image and get OCR classification model
+   * result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR
+   * classification model result will be writen to this structure. \return true
+   * if the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            vision::OCRResult *ocr_result);
+
+  /** \brief BatchPredict the input image and get OCR classification model
+   * cls_result.
+   *
+   * \param[in] images The list of input image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] cls_labels The label
+   * results of cls model will be written in to this vector. \param[in]
+   * cls_scores The score results of cls model will be written in to this
+   * vector. \return true if the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<int32_t> *cls_labels,
+                            std::vector<float> *cls_scores);
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<int32_t> *cls_labels,
+                            std::vector<float> *cls_scores, size_t start_index,
+                            size_t end_index);
+
+  /// Get preprocessor reference of ClassifierPreprocessor
+  virtual ClassifierPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of ClassifierPostprocessor
+  virtual ClassifierPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+private:
+  bool Initialize();
+  ClassifierPreprocessor preprocessor_;
+  ClassifierPostprocessor postprocessor_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc
new file mode 100755
index 0000000000..9f50d4b6b5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/cls_postprocessor.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+bool SingleBatchPostprocessor(const float *out_data, const size_t &length,
+                              int *cls_label, float *cls_score) {
+
+  *cls_label = std::distance(&out_data[0],
+                             std::max_element(&out_data[0], &out_data[length]));
+
+  *cls_score = float(*std::max_element(&out_data[0], &out_data[length]));
+  return true;
+}
+
+bool ClassifierPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                                  std::vector<int32_t> *cls_labels,
+                                  std::vector<float> *cls_scores) {
+  size_t total_size = tensors[0].shape[0];
+  return Run(tensors, cls_labels, cls_scores, 0, total_size);
+}
+
+bool ClassifierPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                                  std::vector<int32_t> *cls_labels,
+                                  std::vector<float> *cls_scores,
+                                  size_t start_index, size_t total_size) {
+  // Classifier have only 1 output tensor.
+  const FDTensor &tensor = tensors[0];
+
+  // For Classifier, the output tensor shape = [batch,2]
+  size_t batch = tensor.shape[0];
+  size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1,
+                             std::multiplies<int>());
+
+  if (batch <= 0) {
+    FDERROR << "The infer outputTensor.shape[0] <=0, wrong infer result."
+            << std::endl;
+    return false;
+  }
+  if (start_index < 0 || total_size <= 0) {
+    FDERROR << "start_index or total_size error. Correct is: 0 <= start_index "
+               "< total_size"
+            << std::endl;
+    return false;
+  }
+  if ((start_index + batch) > total_size) {
+    FDERROR << "start_index or total_size error. Correct is: start_index + "
+               "batch(outputTensor.shape[0]) <= total_size"
+            << std::endl;
+    return false;
+  }
+
+  cls_labels->resize(total_size);
+  cls_scores->resize(total_size);
+  const float *tensor_data = reinterpret_cast<const float *>(tensor.Data());
+  for (int i_batch = 0; i_batch < batch; ++i_batch) {
+    SingleBatchPostprocessor(tensor_data + i_batch * length, length,
+                             &cls_labels->at(i_batch + start_index),
+                             &cls_scores->at(i_batch + start_index));
+  }
+
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h
new file mode 100755
index 0000000000..6991ad04d7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_postprocessor.h
@@ -0,0 +1,54 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Postprocessor object for Classifier serials model.
+ */
+class ULTRAINFER_DECL ClassifierPostprocessor {
+public:
+  /** \brief Process the result of runtime and fill to ClassifyResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] cls_labels The output label results of classification model
+   * \param[in] cls_scores The output score results of classification model
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<int32_t> *cls_labels, std::vector<float> *cls_scores);
+
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<int32_t> *cls_labels, std::vector<float> *cls_scores,
+           size_t start_index, size_t total_size);
+
+  /// Set threshold for the classification postprocess, default is 0.9
+  void SetClsThresh(float cls_thresh) { cls_thresh_ = cls_thresh; }
+
+  /// Get threshold value of the classification postprocess.
+  float GetClsThresh() const { return cls_thresh_; }
+
+private:
+  float cls_thresh_ = 0.9;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc
new file mode 100755
index 0000000000..078249629f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.cc
@@ -0,0 +1,102 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/cls_preprocessor.h"
+
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+ClassifierPreprocessor::ClassifierPreprocessor() {
+  resize_op_ = std::make_shared<Resize>(-1, -1);
+
+  std::vector<float> value = {0, 0, 0};
+  pad_op_ = std::make_shared<Pad>(0, 0, 0, 0, value);
+
+  normalize_op_ =
+      std::make_shared<Normalize>(std::vector<float>({0.5f, 0.5f, 0.5f}),
+                                  std::vector<float>({0.5f, 0.5f, 0.5f}), true);
+  hwc2chw_op_ = std::make_shared<HWC2CHW>();
+}
+
+void ClassifierPreprocessor::OcrClassifierResizeImage(
+    FDMat *mat, const std::vector<int> &cls_image_shape) {
+  int img_c = cls_image_shape[0];
+  int img_h = cls_image_shape[1];
+  int img_w = cls_image_shape[2];
+
+  float ratio = float(mat->Width()) / float(mat->Height());
+
+  int resize_w;
+  if (ceilf(img_h * ratio) > img_w)
+    resize_w = img_w;
+  else
+    resize_w = int(ceilf(img_h * ratio));
+
+  resize_op_->SetWidthAndHeight(resize_w, img_h);
+  (*resize_op_)(mat);
+}
+
+bool ClassifierPreprocessor::Run(std::vector<FDMat> *images,
+                                 std::vector<FDTensor> *outputs,
+                                 size_t start_index, size_t end_index) {
+  if (images->size() == 0 || start_index < 0 || end_index <= start_index ||
+      end_index > images->size()) {
+    FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
+               "end_index <= images->size()"
+            << std::endl;
+    return false;
+  }
+
+  std::vector<FDMat> mats(end_index - start_index);
+  for (size_t i = start_index; i < end_index; ++i) {
+    mats[i - start_index] = images->at(i);
+  }
+  return Run(&mats, outputs);
+}
+
+bool ClassifierPreprocessor::Apply(FDMatBatch *image_batch,
+                                   std::vector<FDTensor> *outputs) {
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    OcrClassifierResizeImage(mat, cls_image_shape_);
+    if (!disable_normalize_) {
+      (*normalize_op_)(mat);
+    }
+    std::vector<float> value = {0, 0, 0};
+    if (mat->Width() < cls_image_shape_[2]) {
+      pad_op_->SetPaddingSize(0, 0, 0, cls_image_shape_[2] - mat->Width());
+      (*pad_op_)(mat);
+    }
+    if (!disable_permute_) {
+      (*hwc2chw_op_)(mat);
+    }
+  }
+  // Only have 1 output tensor.
+  outputs->resize(1);
+  // Get the NCHW tensor
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h
new file mode 100755
index 0000000000..f24468db8c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/cls_preprocessor.h
@@ -0,0 +1,86 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Preprocessor object for Classifier serials model.
+ */
+class ULTRAINFER_DECL ClassifierPreprocessor : public ProcessorManager {
+public:
+  ClassifierPreprocessor();
+  using ProcessorManager::Run;
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input data list, all the elements are FDMat
+   * \param[in] outputs The output tensors which will be fed into runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           size_t start_index, size_t end_index);
+
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// Set preprocess normalize parameters, please call this API to customize
+  /// the normalize parameters, otherwise it will use the default normalize
+  /// parameters.
+  void SetNormalize(const std::vector<float> &mean,
+                    const std::vector<float> &std, bool is_scale) {
+    normalize_op_ = std::make_shared<Normalize>(mean, std, is_scale);
+  }
+
+  /// Set cls_image_shape for the classification preprocess
+  void SetClsImageShape(const std::vector<int> &cls_image_shape) {
+    cls_image_shape_ = cls_image_shape;
+  }
+  /// Get cls_image_shape for the classification preprocess
+  std::vector<int> GetClsImageShape() const { return cls_image_shape_; }
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+
+private:
+  void OcrClassifierResizeImage(FDMat *mat,
+                                const std::vector<int> &cls_image_shape);
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  std::vector<int> cls_image_shape_ = {3, 48, 192};
+
+  std::shared_ptr<Resize> resize_op_;
+  std::shared_ptr<Pad> pad_op_;
+  std::shared_ptr<Normalize> normalize_op_;
+  std::shared_ptr<HWC2CHW> hwc2chw_op_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc
new file mode 100755
index 0000000000..5d6598a5fa
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.cc
@@ -0,0 +1,124 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/dbcurvedetector.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+DBCURVEDetector::DBCURVEDetector() {}
+DBCURVEDetector::DBCURVEDetector(const std::string &model_file,
+                                 const std::string &params_file,
+                                 const RuntimeOption &custom_option,
+                                 const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+// Init
+bool DBCURVEDetector::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<DBCURVEDetector> DBCURVEDetector::Clone() const {
+  std::unique_ptr<DBCURVEDetector> clone_model =
+      utils::make_unique<DBCURVEDetector>(DBCURVEDetector(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool DBCURVEDetector::Predict(const cv::Mat &img,
+                              std::vector<std::vector<int>> *boxes_result) {
+  std::vector<std::vector<std::vector<int>>> det_results;
+  if (!BatchPredict({img}, &det_results)) {
+    return false;
+  }
+  *boxes_result = std::move(det_results[0]);
+  return true;
+}
+
+bool DBCURVEDetector::Predict(const cv::Mat &img,
+                              vision::OCRCURVEResult *ocr_result) {
+  if (!Predict(img, &(ocr_result->boxes))) {
+    return false;
+  }
+  return true;
+}
+
+bool DBCURVEDetector::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<vision::OCRCURVEResult> *ocr_results) {
+  std::vector<std::vector<std::vector<int>>> det_results;
+  if (!BatchPredict(images, &det_results)) {
+    return false;
+  }
+  ocr_results->resize(det_results.size());
+  for (int i = 0; i < det_results.size(); i++) {
+    (*ocr_results)[i].boxes = std::move(det_results[i]);
+  }
+  return true;
+}
+
+bool DBCURVEDetector::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<std::vector<std::vector<int>>> *det_results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  auto batch_det_img_info = preprocessor_.GetBatchImgInfo();
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, det_results,
+                          *batch_det_img_info)) {
+    FDERROR << "Failed to postprocess the inference cls_results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h
new file mode 100755
index 0000000000..999430eb8e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbcurvedetector.h
@@ -0,0 +1,118 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h"
+#include "ultrainfer/vision/ocr/ppocr/det_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All OCR series model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+
+/*! @brief DBCURVEDetector object is used to load the detection model provided
+ * by PaddleOCR.
+ */
+class ULTRAINFER_DECL DBCURVEDetector : public UltraInferModel {
+public:
+  DBCURVEDetector();
+  /** \brief Set path of model file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ./ch_PP-OCRv3_det_infer/model.pdmodel. \param[in] params_file Path of
+   * parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model
+   * format is ONNX, this parameter will be ignored. \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends`. \param[in] model_format Model
+   * format of the loaded model, default is Paddle format.
+   */
+  DBCURVEDetector(const std::string &model_file,
+                  const std::string &params_file = "",
+                  const RuntimeOption &custom_option = RuntimeOption(),
+                  const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new DBCURVEDetector with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new DBCURVEDetector* type unique pointer
+   */
+  virtual std::unique_ptr<DBCURVEDetector> Clone() const;
+
+  /// Get model's name
+  std::string ModelName() const { return "ppocr/ocr_det"; }
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] boxes_result The output of
+   * OCR detection model result will be writen to this structure. \return true
+   * if the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img,
+                       std::vector<std::vector<int>> *boxes_result);
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR
+   * detection model result will be writen to this structure. \return true if
+   * the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img, vision::OCRCURVEResult *ocr_result);
+
+  /** \brief BatchPredict the input image and get OCR detection model result.
+   *
+   * \param[in] images The list input of image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] det_results The output
+   * of OCR detection model result will be writen to this structure. \return
+   * true if the prediction is successed, otherwise false.
+   */
+  virtual bool
+  BatchPredict(const std::vector<cv::Mat> &images,
+               std::vector<std::vector<std::vector<int>>> *det_results);
+
+  /** \brief BatchPredict the input image and get OCR detection model result.
+   *
+   * \param[in] images The list input of image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] ocr_results The output
+   * of OCR detection model result will be writen to this structure. \return
+   * true if the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<vision::OCRCURVEResult> *ocr_results);
+
+  /// Get preprocessor reference of DBCURVEDetectorPreprocessor
+  virtual DBDetectorPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of DBCURVEDetectorPostprocessor
+  virtual DBCURVEDetectorPostprocessor &GetPostprocessor() {
+    return postprocessor_;
+  }
+
+private:
+  bool Initialize();
+  DBDetectorPreprocessor preprocessor_;
+  DBCURVEDetectorPostprocessor postprocessor_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc
new file mode 100755
index 0000000000..3082c55942
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.cc
@@ -0,0 +1,122 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/dbdetector.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+DBDetector::DBDetector() {}
+DBDetector::DBDetector(const std::string &model_file,
+                       const std::string &params_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+// Init
+bool DBDetector::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<DBDetector> DBDetector::Clone() const {
+  std::unique_ptr<DBDetector> clone_model =
+      utils::make_unique<DBDetector>(DBDetector(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool DBDetector::Predict(const cv::Mat &img,
+                         std::vector<std::array<int, 8>> *boxes_result) {
+  std::vector<std::vector<std::array<int, 8>>> det_results;
+  if (!BatchPredict({img}, &det_results)) {
+    return false;
+  }
+  *boxes_result = std::move(det_results[0]);
+  return true;
+}
+
+bool DBDetector::Predict(const cv::Mat &img, vision::OCRResult *ocr_result) {
+  if (!Predict(img, &(ocr_result->boxes))) {
+    return false;
+  }
+  return true;
+}
+
+bool DBDetector::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<vision::OCRResult> *ocr_results) {
+  std::vector<std::vector<std::array<int, 8>>> det_results;
+  if (!BatchPredict(images, &det_results)) {
+    return false;
+  }
+  ocr_results->resize(det_results.size());
+  for (int i = 0; i < det_results.size(); i++) {
+    (*ocr_results)[i].boxes = std::move(det_results[i]);
+  }
+  return true;
+}
+
+bool DBDetector::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<std::vector<std::array<int, 8>>> *det_results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  auto batch_det_img_info = preprocessor_.GetBatchImgInfo();
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, det_results,
+                          *batch_det_img_info)) {
+    FDERROR << "Failed to postprocess the inference cls_results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h
new file mode 100755
index 0000000000..8f69f8f717
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/dbdetector.h
@@ -0,0 +1,115 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/det_postprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/det_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All OCR series model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+
+/*! @brief DBDetector object is used to load the detection model provided by
+ * PaddleOCR.
+ */
+class ULTRAINFER_DECL DBDetector : public UltraInferModel {
+public:
+  DBDetector();
+  /** \brief Set path of model file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ./ch_PP-OCRv3_det_infer/model.pdmodel. \param[in] params_file Path of
+   * parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model
+   * format is ONNX, this parameter will be ignored. \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends`. \param[in] model_format Model
+   * format of the loaded model, default is Paddle format.
+   */
+  DBDetector(const std::string &model_file, const std::string &params_file = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new DBDetector with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new DBDetector* type unique pointer
+   */
+  virtual std::unique_ptr<DBDetector> Clone() const;
+
+  /// Get model's name
+  std::string ModelName() const { return "ppocr/ocr_det"; }
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] boxes_result The output of
+   * OCR detection model result will be writen to this structure. \return true
+   * if the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img,
+                       std::vector<std::array<int, 8>> *boxes_result);
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR
+   * detection model result will be writen to this structure. \return true if
+   * the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result);
+
+  /** \brief BatchPredict the input image and get OCR detection model result.
+   *
+   * \param[in] images The list input of image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] det_results The output
+   * of OCR detection model result will be writen to this structure. \return
+   * true if the prediction is successed, otherwise false.
+   */
+  virtual bool
+  BatchPredict(const std::vector<cv::Mat> &images,
+               std::vector<std::vector<std::array<int, 8>>> *det_results);
+
+  /** \brief BatchPredict the input image and get OCR detection model result.
+   *
+   * \param[in] images The list input of image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] ocr_results The output
+   * of OCR detection model result will be writen to this structure. \return
+   * true if the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<vision::OCRResult> *ocr_results);
+
+  /// Get preprocessor reference of DBDetectorPreprocessor
+  virtual DBDetectorPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of DBDetectorPostprocessor
+  virtual DBDetectorPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+private:
+  bool Initialize();
+  DBDetectorPreprocessor preprocessor_;
+  DBDetectorPostprocessor postprocessor_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc
new file mode 100755
index 0000000000..18a87b8172
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.cc
@@ -0,0 +1,98 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/det_postprocessor.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+bool DBDetectorPostprocessor::SingleBatchPostprocessor(
+    const float *out_data, int n2, int n3,
+    const std::array<int, 4> &det_img_info,
+    std::vector<std::array<int, 8>> *boxes_result) {
+  int n = n2 * n3;
+
+  // prepare bitmap
+  std::vector<float> pred(n, 0.0);
+  std::vector<unsigned char> cbuf(n, ' ');
+
+  for (int i = 0; i < n; i++) {
+    pred[i] = float(out_data[i]);
+    cbuf[i] = (unsigned char)((out_data[i]) * 255);
+  }
+  cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char *)cbuf.data());
+  cv::Mat pred_map(n2, n3, CV_32F, (float *)pred.data());
+
+  const double threshold = det_db_thresh_ * 255;
+  const double maxvalue = 255;
+  cv::Mat bit_map;
+  cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
+  if (use_dilation_) {
+    cv::Mat dila_ele =
+        cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
+    cv::dilate(bit_map, bit_map, dila_ele);
+  }
+
+  std::vector<std::vector<std::vector<int>>> boxes;
+
+  boxes = util_post_processor_.BoxesFromBitmap(
+      pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_,
+      det_db_score_mode_);
+
+  boxes = util_post_processor_.FilterTagDetRes(boxes, det_img_info);
+
+  // boxes to boxes_result
+  for (int i = 0; i < boxes.size(); i++) {
+    std::array<int, 8> new_box;
+    int k = 0;
+    for (auto &vec : boxes[i]) {
+      for (auto &e : vec) {
+        new_box[k++] = e;
+      }
+    }
+    boxes_result->emplace_back(new_box);
+  }
+
+  return true;
+}
+
+bool DBDetectorPostprocessor::Run(
+    const std::vector<FDTensor> &tensors,
+    std::vector<std::vector<std::array<int, 8>>> *results,
+    const std::vector<std::array<int, 4>> &batch_det_img_info) {
+  // DBDetector have only 1 output tensor.
+  const FDTensor &tensor = tensors[0];
+
+  // For DBDetector, the output tensor shape = [batch, 1, ?, ?]
+  size_t batch = tensor.shape[0];
+  size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1,
+                             std::multiplies<int>());
+  const float *tensor_data = reinterpret_cast<const float *>(tensor.Data());
+
+  results->resize(batch);
+  for (int i_batch = 0; i_batch < batch; ++i_batch) {
+    SingleBatchPostprocessor(tensor_data, tensor.shape[2], tensor.shape[3],
+                             batch_det_img_info[i_batch],
+                             &results->at(i_batch));
+    tensor_data = tensor_data + length;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h
new file mode 100755
index 0000000000..6583a51bad
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor.h
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Postprocessor object for DBDetector serials model.
+ */
+class ULTRAINFER_DECL DBDetectorPostprocessor {
+public:
+  /** \brief Process the result of runtime and fill to results structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] results The output result of detector
+   * \param[in] batch_det_img_info The detector_preprocess result
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<std::vector<std::array<int, 8>>> *results,
+           const std::vector<std::array<int, 4>> &batch_det_img_info);
+
+  /// Set det_db_thresh for the detection postprocess, default is 0.3
+  void SetDetDBThresh(double det_db_thresh) { det_db_thresh_ = det_db_thresh; }
+  /// Get det_db_thresh of the detection postprocess
+  double GetDetDBThresh() const { return det_db_thresh_; }
+
+  /// Set det_db_box_thresh for the detection postprocess, default is 0.6
+  void SetDetDBBoxThresh(double det_db_box_thresh) {
+    det_db_box_thresh_ = det_db_box_thresh;
+  }
+  /// Get det_db_box_thresh of the detection postprocess
+  double GetDetDBBoxThresh() const { return det_db_box_thresh_; }
+
+  /// Set det_db_unclip_ratio for the detection postprocess, default is 1.5
+  void SetDetDBUnclipRatio(double det_db_unclip_ratio) {
+    det_db_unclip_ratio_ = det_db_unclip_ratio;
+  }
+  /// Get det_db_unclip_ratio_ of the detection postprocess
+  double GetDetDBUnclipRatio() const { return det_db_unclip_ratio_; }
+
+  /// Set det_db_score_mode for the detection postprocess, default is 'slow'
+  void SetDetDBScoreMode(const std::string &det_db_score_mode) {
+    det_db_score_mode_ = det_db_score_mode;
+  }
+  /// Get det_db_score_mode_ of the detection postprocess
+  std::string GetDetDBScoreMode() const { return det_db_score_mode_; }
+
+  /// Set use_dilation for the detection postprocess, default is fasle
+  void SetUseDilation(int use_dilation) { use_dilation_ = use_dilation; }
+  /// Get use_dilation of the detection postprocess
+  int GetUseDilation() const { return use_dilation_; }
+
+private:
+  double det_db_thresh_ = 0.3;
+  double det_db_box_thresh_ = 0.6;
+  double det_db_unclip_ratio_ = 1.5;
+  std::string det_db_score_mode_ = "slow";
+  bool use_dilation_ = false;
+  PostProcessor util_post_processor_;
+  bool SingleBatchPostprocessor(const float *out_data, int n2, int n3,
+                                const std::array<int, 4> &det_img_info,
+                                std::vector<std::array<int, 8>> *boxes_result);
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc
new file mode 100755
index 0000000000..ab06c59a08
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.cc
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+bool DBCURVEDetectorPostprocessor::SingleBatchPostprocessor(
+    const float *out_data, int n2, int n3,
+    const std::array<int, 4> &det_img_info,
+    std::vector<std::vector<int>> *boxes_result) {
+  int n = n2 * n3;
+
+  // prepare bitmap
+  std::vector<float> pred(n, 0.0);
+  std::vector<unsigned char> cbuf(n, ' ');
+
+  for (int i = 0; i < n; i++) {
+    pred[i] = float(out_data[i]);
+    cbuf[i] = (unsigned char)((out_data[i]) * 255);
+  }
+  cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char *)cbuf.data());
+  cv::Mat pred_map(n2, n3, CV_32F, (float *)pred.data());
+
+  const double threshold = det_db_thresh_ * 255;
+  const double maxvalue = 255;
+  cv::Mat bit_map;
+  cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
+  if (use_dilation_) {
+    cv::Mat dila_ele =
+        cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
+    cv::dilate(bit_map, bit_map, dila_ele);
+  }
+
+  std::vector<std::vector<std::vector<int>>> boxes;
+
+  if (det_db_box_type_ == "bbox") {
+    boxes = util_post_processor_.BoxesFromBitmap(
+        pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_,
+        det_db_score_mode_);
+    boxes = util_post_processor_.FilterTagDetRes(boxes, det_img_info);
+  } else {
+    boxes = util_post_processor_.PolygonFromBitmap(
+        pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_,
+        det_db_score_mode_);
+    boxes = util_post_processor_.FilterCURVETagDetRes(boxes, det_img_info);
+  }
+
+  // boxes to boxes_result
+  for (int i = 0; i < boxes.size(); i++) {
+    std::vector<int> new_box;
+    for (auto &vec : boxes[i]) {
+      for (auto &e : vec) {
+        new_box.push_back(e);
+      }
+    }
+    boxes_result->emplace_back(new_box);
+  }
+
+  return true;
+}
+
+bool DBCURVEDetectorPostprocessor::Run(
+    const std::vector<FDTensor> &tensors,
+    std::vector<std::vector<std::vector<int>>> *results,
+    const std::vector<std::array<int, 4>> &batch_det_img_info) {
+  // DBCURVEDetector have only 1 output tensor.
+  const FDTensor &tensor = tensors[0];
+
+  // For DBCURVEDetector, the output tensor shape = [batch, 1, ?, ?]
+  size_t batch = tensor.shape[0];
+  size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1,
+                             std::multiplies<int>());
+  const float *tensor_data = reinterpret_cast<const float *>(tensor.Data());
+
+  results->resize(batch);
+  for (int i_batch = 0; i_batch < batch; ++i_batch) {
+    SingleBatchPostprocessor(tensor_data, tensor.shape[2], tensor.shape[3],
+                             batch_det_img_info[i_batch],
+                             &results->at(i_batch));
+    tensor_data = tensor_data + length;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h
new file mode 100755
index 0000000000..bd4e29b471
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_postprocessor_curve.h
@@ -0,0 +1,89 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Postprocessor object for DBCURVEDetector serials model.
+ */
+class ULTRAINFER_DECL DBCURVEDetectorPostprocessor {
+public:
+  /** \brief Process the result of runtime and fill to results structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] results The output result of detector
+   * \param[in] batch_det_img_info The detector_preprocess result
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<std::vector<std::vector<int>>> *results,
+           const std::vector<std::array<int, 4>> &batch_det_img_info);
+
+  /// Set det_db_thresh for the detection postprocess, default is 0.3
+  void SetDetDBThresh(double det_db_thresh) { det_db_thresh_ = det_db_thresh; }
+  /// Get det_db_thresh of the detection postprocess
+  double GetDetDBThresh() const { return det_db_thresh_; }
+
+  /// Set det_db_box_thresh for the detection postprocess, default is 0.6
+  void SetDetDBBoxThresh(double det_db_box_thresh) {
+    det_db_box_thresh_ = det_db_box_thresh;
+  }
+  /// Get det_db_box_thresh of the detection postprocess
+  double GetDetDBBoxThresh() const { return det_db_box_thresh_; }
+
+  /// Set det_db_unclip_ratio for the detection postprocess, default is 1.5
+  void SetDetDBUnclipRatio(double det_db_unclip_ratio) {
+    det_db_unclip_ratio_ = det_db_unclip_ratio;
+  }
+  /// Get det_db_unclip_ratio_ of the detection postprocess
+  double GetDetDBUnclipRatio() const { return det_db_unclip_ratio_; }
+
+  void SetDetDBScoreMode(const std::string &det_db_score_mode) {
+    det_db_score_mode_ = det_db_score_mode;
+  }
+
+  void SetDetDBBoxType(const std::string &det_db_box_type) {
+    det_db_box_type_ = det_db_box_type;
+  }
+  std::string GetDetDBScoreMode() const { return det_db_score_mode_; }
+
+  std::string GetDetDBBoxType() const { return det_db_box_type_; }
+
+  /// Set use_dilation for the detection postprocess, default is fasle
+  void SetUseDilation(int use_dilation) { use_dilation_ = use_dilation; }
+  /// Get use_dilation of the detection postprocess
+  int GetUseDilation() const { return use_dilation_; }
+
+private:
+  double det_db_thresh_ = 0.3;
+  double det_db_box_thresh_ = 0.6;
+  double det_db_unclip_ratio_ = 1.5;
+  std::string det_db_box_type_ = "bbox";
+  std::string det_db_score_mode_ = "slow";
+  bool use_dilation_ = false;
+  PostProcessor util_post_processor_;
+  bool SingleBatchPostprocessor(const float *out_data, int n2, int n3,
+                                const std::array<int, 4> &det_img_info,
+                                std::vector<std::vector<int>> *boxes_result);
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc
new file mode 100755
index 0000000000..e41c2eff18
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/det_preprocessor.h"
+
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+std::array<int, 4>
+DBDetectorPreprocessor::OcrDetectorGetInfo(FDMat *img, int max_size_len) {
+  int w = img->Width();
+  int h = img->Height();
+  if (static_shape_infer_) {
+    return {w, h, det_image_shape_[2], det_image_shape_[1]};
+  }
+
+  float ratio = 1.f;
+  int max_wh = w >= h ? w : h;
+  if (max_wh > max_size_len) {
+    if (h > w) {
+      ratio = float(max_size_len) / float(h);
+    } else {
+      ratio = float(max_size_len) / float(w);
+    }
+  }
+  int resize_h = int(float(h) * ratio);
+  int resize_w = int(float(w) * ratio);
+  resize_h = std::max(int(std::round(float(resize_h) / 32) * 32), 32);
+  resize_w = std::max(int(std::round(float(resize_w) / 32) * 32), 32);
+
+  return {w, h, resize_w, resize_h};
+  /*
+   *ratio_h = float(resize_h) / float(h);
+   *ratio_w = float(resize_w) / float(w);
+   */
+}
+
+DBDetectorPreprocessor::DBDetectorPreprocessor() {
+  resize_op_ = std::make_shared<Resize>(-1, -1);
+
+  std::vector<float> value = {0, 0, 0};
+  pad_op_ = std::make_shared<Pad>(0, 0, 0, 0, value);
+
+  normalize_permute_op_ = std::make_shared<NormalizeAndPermute>(
+      std::vector<float>({0.485f, 0.456f, 0.406f}),
+      std::vector<float>({0.229f, 0.224f, 0.225f}), true);
+}
+
+bool DBDetectorPreprocessor::ResizeImage(FDMat *img, int resize_w, int resize_h,
+                                         int max_resize_w, int max_resize_h) {
+  resize_op_->SetWidthAndHeight(resize_w, resize_h);
+  (*resize_op_)(img);
+
+  pad_op_->SetPaddingSize(0, max_resize_h - resize_h, 0,
+                          max_resize_w - resize_w);
+  (*pad_op_)(img);
+  return true;
+}
+
+bool DBDetectorPreprocessor::Apply(FDMatBatch *image_batch,
+                                   std::vector<FDTensor> *outputs) {
+  int max_resize_w = 0;
+  int max_resize_h = 0;
+  batch_det_img_info_.clear();
+  batch_det_img_info_.resize(image_batch->mats->size());
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    batch_det_img_info_[i] = OcrDetectorGetInfo(mat, max_side_len_);
+    max_resize_w = std::max(max_resize_w, batch_det_img_info_[i][2]);
+    max_resize_h = std::max(max_resize_h, batch_det_img_info_[i][3]);
+  }
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3],
+                max_resize_w, max_resize_h);
+  }
+
+  if (!disable_normalize_ && !disable_permute_) {
+    (*normalize_permute_op_)(image_batch);
+  }
+
+  outputs->resize(1);
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h
new file mode 100755
index 0000000000..f2b419232b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/det_preprocessor.h
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/normalize_and_permute.h"
+#include "ultrainfer/vision/common/processors/pad.h"
+#include "ultrainfer/vision/common/processors/resize.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Preprocessor object for DBDetector serials model.
+ */
+class ULTRAINFER_DECL DBDetectorPreprocessor : public ProcessorManager {
+public:
+  DBDetectorPreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// Set max_side_len for the detection preprocess, default is 960
+  void SetMaxSideLen(int max_side_len) { max_side_len_ = max_side_len; }
+
+  /// Get max_side_len of the detection preprocess
+  int GetMaxSideLen() const { return max_side_len_; }
+
+  /// Set preprocess normalize parameters, please call this API to customize
+  /// the normalize parameters, otherwise it will use the default normalize
+  /// parameters.
+  void SetNormalize(const std::vector<float> &mean,
+                    const std::vector<float> &std, bool is_scale) {
+    normalize_permute_op_ =
+        std::make_shared<NormalizeAndPermute>(mean, std, is_scale);
+  }
+
+  /// Get the image info of the last batch, return a list of array
+  /// {image width, image height, resize width, resize height}
+  const std::vector<std::array<int, 4>> *GetBatchImgInfo() {
+    return &batch_det_img_info_;
+  }
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+
+  /// Set det_image_shape for the detection preprocess.
+  /// This api is usually used when you retrain the model.
+  /// Generally, you do not need to use it.
+  void SetDetImageShape(const std::vector<int> &det_image_shape) {
+    det_image_shape_ = det_image_shape;
+  }
+  /// Get cls_image_shape for the classification preprocess
+  std::vector<int> GetDetImageShape() const { return det_image_shape_; }
+
+  /// Set static_shape_infer is true or not. When deploy PP-OCR
+  /// on hardware which can not support dynamic input shape very well,
+  /// like Huawei Ascned, static_shape_infer needs to to be true.
+  void SetStaticShapeInfer(bool static_shape_infer) {
+    static_shape_infer_ = static_shape_infer;
+  }
+  /// Get static_shape_infer of the recognition preprocess
+  bool GetStaticShapeInfer() const { return static_shape_infer_; }
+
+private:
+  bool ResizeImage(FDMat *img, int resize_w, int resize_h, int max_resize_w,
+                   int max_resize_h);
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  int max_side_len_ = 960;
+  std::vector<std::array<int, 4>> batch_det_img_info_;
+  std::shared_ptr<Resize> resize_op_;
+  std::shared_ptr<Pad> pad_op_;
+  std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
+  std::vector<int> det_image_shape_ = {3, 960, 960};
+  bool static_shape_infer_ = false;
+  std::array<int, 4> OcrDetectorGetInfo(FDMat *img, int max_size_len);
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc
new file mode 100755
index 0000000000..11cac3366f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ocrmodel_pybind.cc
@@ -0,0 +1,748 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <pybind11/stl.h>
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPOCRModel(pybind11::module &m) {
+  m.def("sort_boxes", [](std::vector<std::array<int, 8>> &boxes) {
+    vision::ocr::SortBoxes(&boxes);
+    return boxes;
+  });
+
+  // UVDoc
+  pybind11::class_<vision::ocr::UVDocPreprocessor, vision::ProcessorManager>(
+      m, "UVDocPreprocessor")
+      .def(pybind11::init<>())
+      .def("set_normalize",
+           [](vision::ocr::UVDocPreprocessor &self,
+              const std::vector<float> &mean, const std::vector<float> &std,
+              bool is_scale) { self.SetNormalize(mean, std, is_scale); })
+      .def("run",
+           [](vision::ocr::UVDocPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "UVDocPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def(
+          "disable_normalize",
+          [](vision::ocr::UVDocPreprocessor &self) { self.DisableNormalize(); })
+      .def("disable_permute",
+           [](vision::ocr::UVDocPreprocessor &self) { self.DisablePermute(); });
+
+  pybind11::class_<vision::ocr::UVDocPostprocessor>(m, "UVDocPostprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::ocr::UVDocPostprocessor &self,
+                     std::vector<FDTensor> &inputs) {
+        std::vector<FDTensor> results;
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error("Failed to preprocess the input data in "
+                                   "UVDocPostprocessor.");
+        }
+        for (size_t i = 0; i < results.size(); ++i) {
+          results[i].StopSharing();
+        }
+        return results;
+      });
+
+  pybind11::class_<vision::ocr::UVDocWarpper, UltraInferModel>(m,
+                                                               "UVDocWarpper")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(pybind11::init<>())
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::UVDocWarpper::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::UVDocWarpper::GetPostprocessor)
+      .def("clone",
+           [](vision::ocr::UVDocWarpper &self) { return self.Clone(); })
+      .def("predict",
+           [](vision::ocr::UVDocWarpper &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             FDTensor res;
+             self.Predict(mat, &res);
+             res.StopSharing();
+             return res;
+           })
+      .def("batch_predict", [](vision::ocr::UVDocWarpper &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<FDTensor> results;
+        self.BatchPredict(images, &results);
+        for (size_t i = 0; i < results.size(); ++i) {
+          results[i].StopSharing();
+        }
+        return results;
+        // std::vector<cv::Mat> results;
+        // self.BatchPredict(images, &results);
+        // std::vector<pybind11::array_t<unsigned char>> ret;
+        // for(size_t i = 0; i < results.size(); ++i){
+        //   ret.push_back(pybind11::array_t<unsigned char>(
+        //            {results[i].rows, results[i].cols, results[i].channels()},
+        //            results[i].data));
+        // }
+        // return ret;
+      });
+
+  // DBDetector
+  pybind11::class_<vision::ocr::DBDetectorPreprocessor,
+                   vision::ProcessorManager>(m, "DBDetectorPreprocessor")
+      .def(pybind11::init<>())
+      .def_property("static_shape_infer",
+                    &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer,
+                    &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer)
+      .def_property("max_side_len",
+                    &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
+                    &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
+      .def("set_normalize",
+           [](vision::ocr::DBDetectorPreprocessor &self,
+              const std::vector<float> &mean, const std::vector<float> &std,
+              bool is_scale) { self.SetNormalize(mean, std, is_scale); })
+      .def("run",
+           [](vision::ocr::DBDetectorPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             self.Run(&images, &outputs);
+             auto batch_det_img_info = self.GetBatchImgInfo();
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return std::make_pair(outputs, *batch_det_img_info);
+           })
+      .def("disable_normalize",
+           [](vision::ocr::DBDetectorPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor &self) {
+        self.DisablePermute();
+      });
+
+  pybind11::class_<vision::ocr::DBDetectorPostprocessor>(
+      m, "DBDetectorPostprocessor")
+      .def(pybind11::init<>())
+      .def_property("det_db_thresh",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh)
+      .def_property("det_db_box_thresh",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh)
+      .def_property("det_db_unclip_ratio",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio)
+      .def_property("det_db_score_mode",
+                    &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode,
+                    &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode)
+      .def_property("use_dilation",
+                    &vision::ocr::DBDetectorPostprocessor::GetUseDilation,
+                    &vision::ocr::DBDetectorPostprocessor::SetUseDilation)
+
+      .def("run",
+           [](vision::ocr::DBDetectorPostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::array<int, 4>> &batch_det_img_info) {
+             std::vector<std::vector<std::array<int, 8>>> results;
+
+             if (!self.Run(inputs, &results, batch_det_img_info)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "DBDetectorPostprocessor.");
+             }
+             return results;
+           })
+      .def(
+          "run", [](vision::ocr::DBDetectorPostprocessor &self,
+                    std::vector<pybind11::array> &input_array,
+                    const std::vector<std::array<int, 4>> &batch_det_img_info) {
+            std::vector<std::vector<std::array<int, 8>>> results;
+            std::vector<FDTensor> inputs;
+            PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+            if (!self.Run(inputs, &results, batch_det_img_info)) {
+              throw std::runtime_error("Failed to preprocess the input data in "
+                                       "DBDetectorPostprocessor.");
+            }
+            return results;
+          });
+
+  pybind11::class_<vision::ocr::DBCURVEDetectorPostprocessor>(
+      m, "DBCURVEDetectorPostprocessor")
+      .def(pybind11::init<>())
+      .def_property("det_db_thresh",
+                    &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBThresh,
+                    &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBThresh)
+      .def_property(
+          "det_db_box_thresh",
+          &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBBoxThresh,
+          &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBBoxThresh)
+      .def_property(
+          "det_db_unclip_ratio",
+          &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBUnclipRatio,
+          &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBUnclipRatio)
+      .def_property(
+          "det_db_score_mode",
+          &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBScoreMode,
+          &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBScoreMode)
+      .def_property("det_db_box_type",
+                    &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBBoxType,
+                    &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBBoxType)
+      .def_property("use_dilation",
+                    &vision::ocr::DBCURVEDetectorPostprocessor::GetUseDilation,
+                    &vision::ocr::DBCURVEDetectorPostprocessor::SetUseDilation)
+
+      .def("run",
+           [](vision::ocr::DBCURVEDetectorPostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::array<int, 4>> &batch_det_img_info) {
+             std::vector<std::vector<std::vector<int>>> results;
+
+             if (!self.Run(inputs, &results, batch_det_img_info)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "DBCURVEDetectorPostprocessor.");
+             }
+             return results;
+           })
+      .def(
+          "run", [](vision::ocr::DBCURVEDetectorPostprocessor &self,
+                    std::vector<pybind11::array> &input_array,
+                    const std::vector<std::array<int, 4>> &batch_det_img_info) {
+            std::vector<std::vector<std::vector<int>>> results;
+            std::vector<FDTensor> inputs;
+            PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+            if (!self.Run(inputs, &results, batch_det_img_info)) {
+              throw std::runtime_error("Failed to preprocess the input data in "
+                                       "DBCURVEDetectorPostprocessor.");
+            }
+            return results;
+          });
+
+  pybind11::class_<vision::ocr::DBDetector, UltraInferModel>(m, "DBDetector")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(pybind11::init<>())
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::DBDetector::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::DBDetector::GetPostprocessor)
+      .def("predict",
+           [](vision::ocr::DBDetector &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult ocr_result;
+             self.Predict(mat, &ocr_result);
+             return ocr_result;
+           })
+      .def("batch_predict", [](vision::ocr::DBDetector &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<vision::OCRResult> ocr_results;
+        self.BatchPredict(images, &ocr_results);
+        return ocr_results;
+      });
+
+  pybind11::class_<vision::ocr::DBCURVEDetector, UltraInferModel>(
+      m, "DBCURVEDetector")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(pybind11::init<>())
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::DBCURVEDetector::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::DBCURVEDetector::GetPostprocessor)
+      .def("predict",
+           [](vision::ocr::DBCURVEDetector &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRCURVEResult ocr_result;
+             self.Predict(mat, &ocr_result);
+             return ocr_result;
+           })
+      .def("batch_predict", [](vision::ocr::DBCURVEDetector &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<vision::OCRCURVEResult> ocr_results;
+        self.BatchPredict(images, &ocr_results);
+        return ocr_results;
+      });
+
+  // Classifier
+  pybind11::class_<vision::ocr::ClassifierPreprocessor,
+                   vision::ProcessorManager>(m, "ClassifierPreprocessor")
+      .def(pybind11::init<>())
+      .def_property("cls_image_shape",
+                    &vision::ocr::ClassifierPreprocessor::GetClsImageShape,
+                    &vision::ocr::ClassifierPreprocessor::SetClsImageShape)
+      .def("set_normalize",
+           [](vision::ocr::ClassifierPreprocessor &self,
+              const std::vector<float> &mean, const std::vector<float> &std,
+              bool is_scale) { self.SetNormalize(mean, std, is_scale); })
+      .def("run",
+           [](vision::ocr::ClassifierPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "ClassifierPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def("disable_normalize",
+           [](vision::ocr::ClassifierPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute", [](vision::ocr::ClassifierPreprocessor &self) {
+        self.DisablePermute();
+      });
+
+  pybind11::class_<vision::ocr::ClassifierPostprocessor>(
+      m, "ClassifierPostprocessor")
+      .def(pybind11::init<>())
+      .def_property("cls_thresh",
+                    &vision::ocr::ClassifierPostprocessor::GetClsThresh,
+                    &vision::ocr::ClassifierPostprocessor::SetClsThresh)
+      .def("run",
+           [](vision::ocr::ClassifierPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<int> cls_labels;
+             std::vector<float> cls_scores;
+             if (!self.Run(inputs, &cls_labels, &cls_scores)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "ClassifierPostprocessor.");
+             }
+             return std::make_pair(cls_labels, cls_scores);
+           })
+      .def("run", [](vision::ocr::ClassifierPostprocessor &self,
+                     std::vector<pybind11::array> &input_array) {
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        std::vector<int> cls_labels;
+        std::vector<float> cls_scores;
+        if (!self.Run(inputs, &cls_labels, &cls_scores)) {
+          throw std::runtime_error("Failed to preprocess the input data in "
+                                   "ClassifierPostprocessor.");
+        }
+        return std::make_pair(cls_labels, cls_scores);
+      });
+
+  pybind11::class_<vision::ocr::Classifier, UltraInferModel>(m, "Classifier")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(pybind11::init<>())
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::Classifier::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::Classifier::GetPostprocessor)
+      .def("predict",
+           [](vision::ocr::Classifier &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult ocr_result;
+             self.Predict(mat, &ocr_result);
+             return ocr_result;
+           })
+      .def("batch_predict", [](vision::ocr::Classifier &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        vision::OCRResult ocr_result;
+        self.BatchPredict(images, &ocr_result);
+        return ocr_result;
+      });
+
+  // Recognizer
+  pybind11::class_<vision::ocr::RecognizerPreprocessor,
+                   vision::ProcessorManager>(m, "RecognizerPreprocessor")
+      .def(pybind11::init<>())
+      .def_property("static_shape_infer",
+                    &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer,
+                    &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer)
+      .def_property("rec_image_shape",
+                    &vision::ocr::RecognizerPreprocessor::GetRecImageShape,
+                    &vision::ocr::RecognizerPreprocessor::SetRecImageShape)
+      .def("set_normalize",
+           [](vision::ocr::RecognizerPreprocessor &self,
+              const std::vector<float> &mean, const std::vector<float> &std,
+              bool is_scale) { self.SetNormalize(mean, std, is_scale); })
+      .def("run",
+           [](vision::ocr::RecognizerPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "RecognizerPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def("disable_normalize",
+           [](vision::ocr::RecognizerPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute", [](vision::ocr::RecognizerPreprocessor &self) {
+        self.DisablePermute();
+      });
+
+  pybind11::class_<vision::ocr::RecognizerPostprocessor>(
+      m, "RecognizerPostprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run",
+           [](vision::ocr::RecognizerPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<std::string> texts;
+             std::vector<float> rec_scores;
+             if (!self.Run(inputs, &texts, &rec_scores)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "RecognizerPostprocessor.");
+             }
+             return std::make_pair(texts, rec_scores);
+           })
+      .def("run", [](vision::ocr::RecognizerPostprocessor &self,
+                     std::vector<pybind11::array> &input_array) {
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        std::vector<std::string> texts;
+        std::vector<float> rec_scores;
+        if (!self.Run(inputs, &texts, &rec_scores)) {
+          throw std::runtime_error("Failed to preprocess the input data in "
+                                   "RecognizerPostprocessor.");
+        }
+        return std::make_pair(texts, rec_scores);
+      });
+
+  pybind11::class_<vision::ocr::Recognizer, UltraInferModel>(m, "Recognizer")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(pybind11::init<>())
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::Recognizer::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::Recognizer::GetPostprocessor)
+      .def("clone", [](vision::ocr::Recognizer &self) { return self.Clone(); })
+      .def("predict",
+           [](vision::ocr::Recognizer &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult ocr_result;
+             self.Predict(mat, &ocr_result);
+             return ocr_result;
+           })
+      .def("batch_predict", [](vision::ocr::Recognizer &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        vision::OCRResult ocr_result;
+        self.BatchPredict(images, &ocr_result);
+        return ocr_result;
+      });
+
+  // Table
+  pybind11::class_<vision::ocr::StructureV2TablePreprocessor,
+                   vision::ProcessorManager>(m, "StructureV2TablePreprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::ocr::StructureV2TablePreprocessor &self,
+                     std::vector<pybind11::array> &im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        if (!self.Run(&images, &outputs)) {
+          throw std::runtime_error("Failed to preprocess the input data in "
+                                   "StructureV2TablePreprocessor.");
+        }
+
+        auto batch_det_img_info = self.GetBatchImgInfo();
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+
+        return std::make_pair(outputs, *batch_det_img_info);
+      });
+
+  pybind11::class_<vision::ocr::StructureV2TablePostprocessor>(
+      m, "StructureV2TablePostprocessor")
+      .def(pybind11::init<std::string, std::string>())
+      .def("run",
+           [](vision::ocr::StructureV2TablePostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::array<float, 6>> &batch_det_img_info) {
+             std::vector<std::vector<std::array<int, 8>>> boxes;
+             std::vector<std::vector<std::string>> structure_list;
+
+             if (!self.Run(inputs, &boxes, &structure_list,
+                           batch_det_img_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the input data in "
+                   "StructureV2TablePostprocessor.");
+             }
+             return std::make_pair(boxes, structure_list);
+           })
+      .def("run",
+           [](vision::ocr::StructureV2TablePostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::vector<std::array<float, 6>> &batch_det_img_info) {
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             std::vector<std::vector<std::array<int, 8>>> boxes;
+             std::vector<std::vector<std::string>> structure_list;
+
+             if (!self.Run(inputs, &boxes, &structure_list,
+                           batch_det_img_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the input data in "
+                   "StructureV2TablePostprocessor.");
+             }
+             return std::make_pair(boxes, structure_list);
+           });
+
+  pybind11::class_<vision::ocr::StructureV2Table, UltraInferModel>(
+      m, "StructureV2Table")
+      .def(pybind11::init<std::string, std::string, std::string, std::string,
+                          RuntimeOption, ModelFormat>())
+      .def(pybind11::init<>())
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::StructureV2Table::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::StructureV2Table::GetPostprocessor)
+      .def("clone",
+           [](vision::ocr::StructureV2Table &self) { return self.Clone(); })
+      .def("predict",
+           [](vision::ocr::StructureV2Table &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult ocr_result;
+             self.Predict(mat, &ocr_result);
+             return ocr_result;
+           })
+      .def("batch_predict", [](vision::ocr::StructureV2Table &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+
+        std::vector<vision::OCRResult> ocr_results;
+        self.BatchPredict(images, &ocr_results);
+        return ocr_results;
+      });
+
+  // Layout
+  pybind11::class_<vision::ocr::StructureV2LayoutPreprocessor,
+                   vision::ProcessorManager>(m, "StructureV2LayoutPreprocessor")
+      .def(pybind11::init<>())
+      .def_property(
+          "static_shape_infer",
+          &vision::ocr::StructureV2LayoutPreprocessor::GetStaticShapeInfer,
+          &vision::ocr::StructureV2LayoutPreprocessor::SetStaticShapeInfer)
+      .def_property(
+          "layout_image_shape",
+          &vision::ocr::StructureV2LayoutPreprocessor::GetLayoutImageShape,
+          &vision::ocr::StructureV2LayoutPreprocessor::SetLayoutImageShape)
+      .def("set_normalize",
+           [](vision::ocr::StructureV2LayoutPreprocessor &self,
+              const std::vector<float> &mean, const std::vector<float> &std,
+              bool is_scale) { self.SetNormalize(mean, std, is_scale); })
+      .def("run",
+           [](vision::ocr::StructureV2LayoutPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "StructureV2LayoutPreprocessor.");
+             }
+
+             auto batch_layout_img_info = self.GetBatchLayoutImgInfo();
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+
+             return std::make_pair(outputs, *batch_layout_img_info);
+           })
+      .def("disable_normalize",
+           [](vision::ocr::StructureV2LayoutPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::ocr::StructureV2LayoutPreprocessor &self) {
+             self.DisablePermute();
+           });
+
+  pybind11::class_<vision::ocr::StructureV2LayoutPostprocessor>(
+      m, "StructureV2LayoutPostprocessor")
+      .def(pybind11::init<>())
+      .def_property(
+          "score_threshold",
+          &vision::ocr::StructureV2LayoutPostprocessor::GetScoreThreshold,
+          &vision::ocr::StructureV2LayoutPostprocessor::SetScoreThreshold)
+      .def_property(
+          "nms_threshold",
+          &vision::ocr::StructureV2LayoutPostprocessor::GetNMSThreshold,
+          &vision::ocr::StructureV2LayoutPostprocessor::SetNMSThreshold)
+      .def_property("num_class",
+                    &vision::ocr::StructureV2LayoutPostprocessor::GetNumClass,
+                    &vision::ocr::StructureV2LayoutPostprocessor::SetNumClass)
+      .def_property("fpn_stride",
+                    &vision::ocr::StructureV2LayoutPostprocessor::GetFPNStride,
+                    &vision::ocr::StructureV2LayoutPostprocessor::SetFPNStride)
+      .def_property("reg_max",
+                    &vision::ocr::StructureV2LayoutPostprocessor::GetRegMax,
+                    &vision::ocr::StructureV2LayoutPostprocessor::SetRegMax)
+      .def("run",
+           [](vision::ocr::StructureV2LayoutPostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::vector<std::array<int, 4>> &batch_layout_img_info) {
+             std::vector<vision::DetectionResult> results;
+
+             if (!self.Run(inputs, &results, batch_layout_img_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the input data in "
+                   "StructureV2LayoutPostprocessor.");
+             }
+             return results;
+           });
+
+  pybind11::class_<vision::ocr::StructureV2Layout, UltraInferModel>(
+      m, "StructureV2Layout")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def(pybind11::init<>())
+      .def_property_readonly("preprocessor",
+                             &vision::ocr::StructureV2Layout::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::ocr::StructureV2Layout::GetPostprocessor)
+      .def("clone",
+           [](vision::ocr::StructureV2Layout &self) { return self.Clone(); })
+      .def("predict",
+           [](vision::ocr::StructureV2Layout &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult result;
+             self.Predict(mat, &result);
+             return result;
+           })
+      .def("batch_predict", [](vision::ocr::StructureV2Layout &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<vision::DetectionResult> results;
+        self.BatchPredict(images, &results);
+        return results;
+      });
+
+  pybind11::class_<vision::ocr::StructureV2SERViLayoutXLMModel,
+                   UltraInferModel>(m, "StructureV2SERViLayoutXLMModel")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("clone",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel &self) {
+             return self.Clone();
+           })
+      .def("predict",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel &self,
+              pybind11::array &data) {
+             throw std::runtime_error(
+                 "StructureV2SERViLayoutXLMModel do not support predict.");
+           })
+      .def(
+          "batch_predict",
+          [](vision::ocr::StructureV2SERViLayoutXLMModel &self,
+             std::vector<pybind11::array> &data) {
+            throw std::runtime_error(
+                "StructureV2SERViLayoutXLMModel do not support batch_predict.");
+          })
+      .def("infer",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel &self,
+              std::map<std::string, pybind11::array> &data) {
+             std::vector<FDTensor> inputs(data.size());
+             int index = 0;
+             for (auto iter = data.begin(); iter != data.end(); ++iter) {
+               std::vector<int64_t> data_shape;
+               data_shape.insert(data_shape.begin(), iter->second.shape(),
+                                 iter->second.shape() + iter->second.ndim());
+               auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
+
+               inputs[index].Resize(data_shape, dtype);
+               memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
+                      iter->second.nbytes());
+               inputs[index].name = iter->first;
+               index += 1;
+             }
+
+             std::vector<FDTensor> outputs(self.NumOutputsOfRuntime());
+             self.Infer(inputs, &outputs);
+
+             std::vector<pybind11::array> results;
+             results.reserve(outputs.size());
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
+               results.emplace_back(
+                   pybind11::array(numpy_dtype, outputs[i].shape));
+               memcpy(results[i].mutable_data(), outputs[i].Data(),
+                      outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
+             }
+             return results;
+           })
+      .def("get_input_info",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel &self, int &index) {
+             return self.InputInfoOfRuntime(index);
+           });
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc
new file mode 100755
index 0000000000..f260decc2c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_pybind.cc
@@ -0,0 +1,147 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <pybind11/stl.h>
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPOCRv4(pybind11::module &m) {
+  // PPOCRv4
+  pybind11::class_<pipeline::PPOCRv4, UltraInferModel>(m, "PPOCRv4")
+
+      .def(pybind11::init<ultrainfer::vision::ocr::DBDetector *,
+                          ultrainfer::vision::ocr::Classifier *,
+                          ultrainfer::vision::ocr::Recognizer *>())
+      .def(pybind11::init<ultrainfer::vision::ocr::DBDetector *,
+                          ultrainfer::vision::ocr::Recognizer *>())
+      .def_property("cls_batch_size", &pipeline::PPOCRv4::GetClsBatchSize,
+                    &pipeline::PPOCRv4::SetClsBatchSize)
+      .def_property("rec_batch_size", &pipeline::PPOCRv4::GetRecBatchSize,
+                    &pipeline::PPOCRv4::SetRecBatchSize)
+      .def("clone", [](pipeline::PPOCRv4 &self) { return self.Clone(); })
+      .def("predict",
+           [](pipeline::PPOCRv4 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](pipeline::PPOCRv4 &self, std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::OCRResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           });
+}
+void BindPPOCRv3(pybind11::module &m) {
+  // PPOCRv3
+  pybind11::class_<pipeline::PPOCRv3, UltraInferModel>(m, "PPOCRv3")
+
+      .def(pybind11::init<ultrainfer::vision::ocr::DBDetector *,
+                          ultrainfer::vision::ocr::Classifier *,
+                          ultrainfer::vision::ocr::Recognizer *>())
+      .def(pybind11::init<ultrainfer::vision::ocr::DBDetector *,
+                          ultrainfer::vision::ocr::Recognizer *>())
+      .def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize,
+                    &pipeline::PPOCRv3::SetClsBatchSize)
+      .def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize,
+                    &pipeline::PPOCRv3::SetRecBatchSize)
+      .def("clone", [](pipeline::PPOCRv3 &self) { return self.Clone(); })
+      .def("predict",
+           [](pipeline::PPOCRv3 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](pipeline::PPOCRv3 &self, std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::OCRResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           });
+}
+
+void BindPPOCRv2(pybind11::module &m) {
+  // PPOCRv2
+  pybind11::class_<pipeline::PPOCRv2, UltraInferModel>(m, "PPOCRv2")
+      .def(pybind11::init<ultrainfer::vision::ocr::DBDetector *,
+                          ultrainfer::vision::ocr::Classifier *,
+                          ultrainfer::vision::ocr::Recognizer *>())
+      .def(pybind11::init<ultrainfer::vision::ocr::DBDetector *,
+                          ultrainfer::vision::ocr::Recognizer *>())
+      .def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize,
+                    &pipeline::PPOCRv2::SetClsBatchSize)
+      .def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize,
+                    &pipeline::PPOCRv2::SetRecBatchSize)
+      .def("clone", [](pipeline::PPOCRv2 &self) { return self.Clone(); })
+      .def("predict",
+           [](pipeline::PPOCRv2 &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](pipeline::PPOCRv2 &self, std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::OCRResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           });
+}
+
+void BindPPStructureV2Table(pybind11::module &m) {
+  // PPStructureV2Table
+  pybind11::class_<pipeline::PPStructureV2Table, UltraInferModel>(
+      m, "PPStructureV2Table")
+      .def(pybind11::init<ultrainfer::vision::ocr::DBDetector *,
+                          ultrainfer::vision::ocr::Recognizer *,
+                          ultrainfer::vision::ocr::StructureV2Table *>())
+      .def_property("rec_batch_size",
+                    &pipeline::PPStructureV2Table::GetRecBatchSize,
+                    &pipeline::PPStructureV2Table::SetRecBatchSize)
+      .def("clone",
+           [](pipeline::PPStructureV2Table &self) { return self.Clone(); })
+      .def("predict",
+           [](pipeline::PPStructureV2Table &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::OCRResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("batch_predict", [](pipeline::PPStructureV2Table &self,
+                               std::vector<pybind11::array> &data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<vision::OCRResult> results;
+        self.BatchPredict(images, &results);
+        return results;
+      });
+}
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc
new file mode 100755
index 0000000000..cd49d1075b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.cc
@@ -0,0 +1,186 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/ppocr_v2.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace pipeline {
+PPOCRv2::PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model,
+                 ultrainfer::vision::ocr::Classifier *cls_model,
+                 ultrainfer::vision::ocr::Recognizer *rec_model)
+    : detector_(det_model), classifier_(cls_model), recognizer_(rec_model) {
+  Initialized();
+  auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+  preprocess_shape[1] = 32;
+  recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+}
+
+PPOCRv2::PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model,
+                 ultrainfer::vision::ocr::Recognizer *rec_model)
+    : detector_(det_model), recognizer_(rec_model) {
+  Initialized();
+  auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+  preprocess_shape[1] = 32;
+  recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+}
+
+bool PPOCRv2::SetClsBatchSize(int cls_batch_size) {
+  if (cls_batch_size < -1 || cls_batch_size == 0) {
+    FDERROR << "batch_size > 0 or batch_size == -1." << std::endl;
+    return false;
+  }
+  cls_batch_size_ = cls_batch_size;
+  return true;
+}
+
+int PPOCRv2::GetClsBatchSize() { return cls_batch_size_; }
+
+bool PPOCRv2::SetRecBatchSize(int rec_batch_size) {
+  if (rec_batch_size < -1 || rec_batch_size == 0) {
+    FDERROR << "batch_size > 0 or batch_size == -1." << std::endl;
+    return false;
+  }
+  rec_batch_size_ = rec_batch_size;
+  return true;
+}
+
+int PPOCRv2::GetRecBatchSize() { return rec_batch_size_; }
+
+bool PPOCRv2::Initialized() const {
+
+  if (detector_ != nullptr && !detector_->Initialized()) {
+    return false;
+  }
+
+  if (classifier_ != nullptr && !classifier_->Initialized()) {
+    return false;
+  }
+
+  if (recognizer_ != nullptr && !recognizer_->Initialized()) {
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<PPOCRv2> PPOCRv2::Clone() const {
+  std::unique_ptr<PPOCRv2> clone_model =
+      utils::make_unique<PPOCRv2>(PPOCRv2(*this));
+  clone_model->detector_ = detector_->Clone().release();
+  if (classifier_ != nullptr) {
+    clone_model->classifier_ = classifier_->Clone().release();
+  }
+  clone_model->recognizer_ = recognizer_->Clone().release();
+  return clone_model;
+}
+
+bool PPOCRv2::Predict(cv::Mat *img, ultrainfer::vision::OCRResult *result) {
+  return Predict(*img, result);
+}
+
+bool PPOCRv2::Predict(const cv::Mat &img,
+                      ultrainfer::vision::OCRResult *result) {
+  std::vector<ultrainfer::vision::OCRResult> batch_result(1);
+  bool success = BatchPredict({img}, &batch_result);
+  if (!success) {
+    return success;
+  }
+  *result = std::move(batch_result[0]);
+  return true;
+};
+
+bool PPOCRv2::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<ultrainfer::vision::OCRResult> *batch_result) {
+  batch_result->clear();
+  batch_result->resize(images.size());
+  std::vector<std::vector<std::array<int, 8>>> batch_boxes(images.size());
+
+  if (!detector_->BatchPredict(images, &batch_boxes)) {
+    FDERROR << "There's error while detecting image in PPOCR." << std::endl;
+    return false;
+  }
+
+  for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) {
+    vision::ocr::SortBoxes(&(batch_boxes[i_batch]));
+    (*batch_result)[i_batch].boxes = batch_boxes[i_batch];
+  }
+
+  for (int i_batch = 0; i_batch < images.size(); ++i_batch) {
+    ultrainfer::vision::OCRResult &ocr_result = (*batch_result)[i_batch];
+    // Get croped images by detection result
+    const std::vector<std::array<int, 8>> &boxes = ocr_result.boxes;
+    const cv::Mat &img = images[i_batch];
+    std::vector<cv::Mat> image_list;
+    if (boxes.size() == 0) {
+      image_list.emplace_back(img);
+    } else {
+      image_list.resize(boxes.size());
+      for (size_t i_box = 0; i_box < boxes.size(); ++i_box) {
+        image_list[i_box] = vision::ocr::GetRotateCropImage(img, boxes[i_box]);
+      }
+    }
+    std::vector<int32_t> *cls_labels_ptr = &ocr_result.cls_labels;
+    std::vector<float> *cls_scores_ptr = &ocr_result.cls_scores;
+
+    std::vector<std::string> *text_ptr = &ocr_result.text;
+    std::vector<float> *rec_scores_ptr = &ocr_result.rec_scores;
+
+    if (nullptr != classifier_) {
+      for (size_t start_index = 0; start_index < image_list.size();
+           start_index += cls_batch_size_) {
+        size_t end_index =
+            std::min(start_index + cls_batch_size_, image_list.size());
+        if (!classifier_->BatchPredict(image_list, cls_labels_ptr,
+                                       cls_scores_ptr, start_index,
+                                       end_index)) {
+          FDERROR << "There's error while recognizing image in PPOCR."
+                  << std::endl;
+          return false;
+        } else {
+          for (size_t i_img = start_index; i_img < end_index; ++i_img) {
+            if (cls_labels_ptr->at(i_img) % 2 == 1 &&
+                cls_scores_ptr->at(i_img) >
+                    classifier_->GetPostprocessor().GetClsThresh()) {
+              cv::rotate(image_list[i_img], image_list[i_img], 1);
+            }
+          }
+        }
+      }
+    }
+
+    std::vector<float> width_list;
+    for (int i = 0; i < image_list.size(); i++) {
+      width_list.push_back(float(image_list[i].cols) / image_list[i].rows);
+    }
+    std::vector<int> indices = vision::ocr::ArgSort(width_list);
+
+    for (size_t start_index = 0; start_index < image_list.size();
+         start_index += rec_batch_size_) {
+      size_t end_index =
+          std::min(start_index + rec_batch_size_, image_list.size());
+      if (!recognizer_->BatchPredict(image_list, text_ptr, rec_scores_ptr,
+                                     start_index, end_index, indices)) {
+        FDERROR << "There's error while recognizing image in PPOCR."
+                << std::endl;
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+} // namespace pipeline
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h
new file mode 100755
index 0000000000..30de63d98f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v2.h
@@ -0,0 +1,112 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/ocr/ppocr/classifier.h"
+#include "ultrainfer/vision/ocr/ppocr/dbdetector.h"
+#include "ultrainfer/vision/ocr/ppocr/recognizer.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+/** \brief This pipeline can launch detection model, classification model and
+ * recognition model sequentially. All OCR pipeline APIs are defined inside this
+ * namespace.
+ *
+ */
+namespace pipeline {
+/*! @brief PPOCRv2 is used to load PP-OCRv2 series models provided by PaddleOCR.
+ */
+class ULTRAINFER_DECL PPOCRv2 : public UltraInferModel {
+public:
+  /** \brief Set up the detection model path, classification model path and
+   * recognition model path respectively.
+   *
+   * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer
+   * \param[in] cls_model Path of classification model, e.g
+   * ./ch_ppocr_mobile_v2.0_cls_infer \param[in] rec_model Path of recognition
+   * model, e.g ./ch_PP-OCRv2_rec_infer
+   */
+  PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model,
+          ultrainfer::vision::ocr::Classifier *cls_model,
+          ultrainfer::vision::ocr::Recognizer *rec_model);
+
+  /** \brief Classification model is optional, so this function is set up the
+   * detection model path and recognition model path respectively.
+   *
+   * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer
+   * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer
+   */
+  PPOCRv2(ultrainfer::vision::ocr::DBDetector *det_model,
+          ultrainfer::vision::ocr::Recognizer *rec_model);
+
+  /** \brief Clone a new PPOCRv2 with less memory usage when multiple instances
+   * of the same model are created
+   *
+   * \return new PPOCRv2* type unique pointer
+   */
+  std::unique_ptr<PPOCRv2> Clone() const;
+
+  /** \brief Predict the input image and get OCR result.
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format. \param[in] result The output OCR result will
+   * be writen to this structure. \return true if the prediction successed,
+   * otherwise false.
+   */
+  virtual bool Predict(cv::Mat *img, ultrainfer::vision::OCRResult *result);
+  virtual bool Predict(const cv::Mat &img,
+                       ultrainfer::vision::OCRResult *result);
+  /** \brief BatchPredict the input image and get OCR result.
+   *
+   * \param[in] images The list of input image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] batch_result The output
+   * list of OCR result will be writen to this structure. \return true if the
+   * prediction successed, otherwise false.
+   */
+  virtual bool
+  BatchPredict(const std::vector<cv::Mat> &images,
+               std::vector<ultrainfer::vision::OCRResult> *batch_result);
+
+  bool Initialized() const override;
+  bool SetClsBatchSize(int cls_batch_size);
+  int GetClsBatchSize();
+  bool SetRecBatchSize(int rec_batch_size);
+  int GetRecBatchSize();
+
+protected:
+  ultrainfer::vision::ocr::DBDetector *detector_ = nullptr;
+  ultrainfer::vision::ocr::Classifier *classifier_ = nullptr;
+  ultrainfer::vision::ocr::Recognizer *recognizer_ = nullptr;
+
+private:
+  int cls_batch_size_ = 1;
+  int rec_batch_size_ = 6;
+};
+
+namespace application {
+namespace ocrsystem {
+typedef pipeline::PPOCRv2 PPOCRSystemv2;
+} // namespace ocrsystem
+} // namespace application
+
+} // namespace pipeline
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h
new file mode 100755
index 0000000000..5e7ff217d2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v3.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/ocr/ppocr/ppocr_v2.h"
+
+namespace ultrainfer {
+/** \brief This pipeline can launch detection model, classification model and
+ * recognition model sequentially. All OCR pipeline APIs are defined inside this
+ * namespace.
+ *
+ */
+namespace pipeline {
+/*! @brief PPOCRv3 is used to load PP-OCRv3 series models provided by PaddleOCR.
+ */
+class ULTRAINFER_DECL PPOCRv3 : public PPOCRv2 {
+public:
+  /** \brief Set up the detection model path, classification model path and
+   * recognition model path respectively.
+   *
+   * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer
+   * \param[in] cls_model Path of classification model, e.g
+   * ./ch_ppocr_mobile_v2.0_cls_infer \param[in] rec_model Path of recognition
+   * model, e.g ./ch_PP-OCRv3_rec_infer
+   */
+  PPOCRv3(ultrainfer::vision::ocr::DBDetector *det_model,
+          ultrainfer::vision::ocr::Classifier *cls_model,
+          ultrainfer::vision::ocr::Recognizer *rec_model)
+      : PPOCRv2(det_model, cls_model, rec_model) {
+    // The only difference between v2 and v3
+    auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+    preprocess_shape[1] = 48;
+    recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+  }
+  /** \brief Classification model is optional, so this function is set up the
+   * detection model path and recognition model path respectively.
+   *
+   * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer
+   * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv3_rec_infer
+   */
+  PPOCRv3(ultrainfer::vision::ocr::DBDetector *det_model,
+          ultrainfer::vision::ocr::Recognizer *rec_model)
+      : PPOCRv2(det_model, rec_model) {
+    // The only difference between v2 and v3
+    auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+    preprocess_shape[1] = 48;
+    recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+  }
+
+  /** \brief Clone a new PPOCRv3 with less memory usage when multiple instances
+   * of the same model are created
+   *
+   * \return new PPOCRv3* type unique pointer
+   */
+  std::unique_ptr<PPOCRv3> Clone() const {
+    std::unique_ptr<PPOCRv3> clone_model =
+        utils::make_unique<PPOCRv3>(PPOCRv3(*this));
+    clone_model->detector_ = detector_->Clone().release();
+    if (classifier_ != nullptr) {
+      clone_model->classifier_ = classifier_->Clone().release();
+    }
+    clone_model->recognizer_ = recognizer_->Clone().release();
+    return clone_model;
+  }
+};
+
+} // namespace pipeline
+
+namespace application {
+namespace ocrsystem {
+typedef pipeline::PPOCRv3 PPOCRSystemv3;
+} // namespace ocrsystem
+} // namespace application
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h
new file mode 100755
index 0000000000..08867b5aa7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppocr_v4.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/ocr/ppocr/ppocr_v3.h"
+
+namespace ultrainfer {
+/** \brief This pipeline can launch detection model, classification model and
+ * recognition model sequentially. All OCR pipeline APIs are defined inside this
+ * namespace.
+ *
+ */
+namespace pipeline {
+/*! @brief PPOCRv4 is used to load PP-OCRv4 series models provided by PaddleOCR.
+ */
+class ULTRAINFER_DECL PPOCRv4 : public PPOCRv3 {
+public:
+  /** \brief Set up the detection model path, classification model path and
+   * recognition model path respectively.
+   *
+   * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer
+   * \param[in] cls_model Path of classification model, e.g
+   * ./ch_ppocr_mobile_v2.0_cls_infer \param[in] rec_model Path of recognition
+   * model, e.g ./ch_PP-OCRv4_rec_infer
+   */
+  PPOCRv4(ultrainfer::vision::ocr::DBDetector *det_model,
+          ultrainfer::vision::ocr::Classifier *cls_model,
+          ultrainfer::vision::ocr::Recognizer *rec_model)
+      : PPOCRv3(det_model, cls_model, rec_model) {
+    // The only difference between v2 and v3
+    auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+    preprocess_shape[1] = 48;
+    recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+  }
+  /** \brief Classification model is optional, so this function is set up the
+   * detection model path and recognition model path respectively.
+   *
+   * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer
+   * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv4_rec_infer
+   */
+  PPOCRv4(ultrainfer::vision::ocr::DBDetector *det_model,
+          ultrainfer::vision::ocr::Recognizer *rec_model)
+      : PPOCRv3(det_model, rec_model) {
+    // The only difference between v2 and v4
+    auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape();
+    preprocess_shape[1] = 48;
+    recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape);
+  }
+
+  /** \brief Clone a new PPOCRv4 with less memory usage when multiple instances
+   * of the same model are created
+   *
+   * \return new PPOCRv4* type unique pointer
+   */
+  std::unique_ptr<PPOCRv4> Clone() const {
+    std::unique_ptr<PPOCRv4> clone_model =
+        utils::make_unique<PPOCRv4>(PPOCRv4(*this));
+    clone_model->detector_ = detector_->Clone().release();
+    if (classifier_ != nullptr) {
+      clone_model->classifier_ = classifier_->Clone().release();
+    }
+    clone_model->recognizer_ = recognizer_->Clone().release();
+    return clone_model;
+  }
+};
+
+} // namespace pipeline
+
+namespace application {
+namespace ocrsystem {
+typedef pipeline::PPOCRv4 PPOCRSystemv4;
+} // namespace ocrsystem
+} // namespace application
+
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h
new file mode 100755
index 0000000000..bffa2e96d4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_layout.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_layout.h"
+
+namespace ultrainfer {
+
+namespace pipeline {
+typedef ultrainfer::vision::ocr::StructureV2Layout PPStructureV2Layout;
+
+namespace application {
+namespace ocrsystem {
+
+// TODO(qiuyanjun): This pipeline may not need
+typedef pipeline::PPStructureV2Layout PPStructureV2LayoutSystem;
+} // namespace ocrsystem
+} // namespace application
+
+} // namespace pipeline
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc
new file mode 100755
index 0000000000..d163878322
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.cc
@@ -0,0 +1,233 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace pipeline {
+PPStructureV2Table::PPStructureV2Table(
+    ultrainfer::vision::ocr::DBDetector *det_model,
+    ultrainfer::vision::ocr::Recognizer *rec_model,
+    ultrainfer::vision::ocr::StructureV2Table *table_model)
+    : detector_(det_model), recognizer_(rec_model), table_(table_model) {
+  Initialized();
+}
+
+bool PPStructureV2Table::SetRecBatchSize(int rec_batch_size) {
+  if (rec_batch_size < -1 || rec_batch_size == 0) {
+    FDERROR << "batch_size > 0 or batch_size == -1." << std::endl;
+    return false;
+  }
+  rec_batch_size_ = rec_batch_size;
+  return true;
+}
+
+int PPStructureV2Table::GetRecBatchSize() { return rec_batch_size_; }
+
+bool PPStructureV2Table::Initialized() const {
+  if (detector_ != nullptr && !detector_->Initialized()) {
+    return false;
+  }
+
+  if (recognizer_ != nullptr && !recognizer_->Initialized()) {
+    return false;
+  }
+
+  if (table_ != nullptr && !table_->Initialized()) {
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<PPStructureV2Table> PPStructureV2Table::Clone() const {
+  std::unique_ptr<PPStructureV2Table> clone_model =
+      utils::make_unique<PPStructureV2Table>(PPStructureV2Table(*this));
+  clone_model->detector_ = detector_->Clone().release();
+  clone_model->recognizer_ = recognizer_->Clone().release();
+  clone_model->table_ = table_->Clone().release();
+  return clone_model;
+}
+
+bool PPStructureV2Table::Predict(cv::Mat *img,
+                                 ultrainfer::vision::OCRResult *result) {
+  return Predict(*img, result);
+}
+
+bool PPStructureV2Table::Predict(const cv::Mat &img,
+                                 ultrainfer::vision::OCRResult *result) {
+  std::vector<ultrainfer::vision::OCRResult> batch_result(1);
+  bool success = BatchPredict({img}, &batch_result);
+  if (!success) {
+    return success;
+  }
+  *result = std::move(batch_result[0]);
+  return true;
+};
+
+bool PPStructureV2Table::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<ultrainfer::vision::OCRResult> *batch_result) {
+  batch_result->clear();
+  batch_result->resize(images.size());
+  std::vector<std::vector<std::array<int, 8>>> batch_boxes(images.size());
+
+  if (!detector_->BatchPredict(images, &batch_boxes)) {
+    FDERROR << "There's error while detecting image in PPOCR." << std::endl;
+    return false;
+  }
+
+  for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) {
+    vision::ocr::SortBoxes(&(batch_boxes[i_batch]));
+    (*batch_result)[i_batch].boxes = batch_boxes[i_batch];
+  }
+
+  for (int i_batch = 0; i_batch < images.size(); ++i_batch) {
+    ultrainfer::vision::OCRResult &ocr_result = (*batch_result)[i_batch];
+    // Get croped images by detection result
+    const std::vector<std::array<int, 8>> &boxes = ocr_result.boxes;
+    const cv::Mat &img = images[i_batch];
+    std::vector<cv::Mat> image_list;
+    if (boxes.size() == 0) {
+      image_list.emplace_back(img);
+    } else {
+      image_list.resize(boxes.size());
+      for (size_t i_box = 0; i_box < boxes.size(); ++i_box) {
+        image_list[i_box] = vision::ocr::GetRotateCropImage(img, boxes[i_box]);
+      }
+    }
+    std::vector<int32_t> *cls_labels_ptr = &ocr_result.cls_labels;
+    std::vector<float> *cls_scores_ptr = &ocr_result.cls_scores;
+
+    std::vector<std::string> *text_ptr = &ocr_result.text;
+    std::vector<float> *rec_scores_ptr = &ocr_result.rec_scores;
+
+    std::vector<float> width_list;
+    for (int i = 0; i < image_list.size(); i++) {
+      width_list.push_back(float(image_list[i].cols) / image_list[i].rows);
+    }
+    std::vector<int> indices = vision::ocr::ArgSort(width_list);
+
+    for (size_t start_index = 0; start_index < image_list.size();
+         start_index += rec_batch_size_) {
+      size_t end_index =
+          std::min(start_index + rec_batch_size_, image_list.size());
+      if (!recognizer_->BatchPredict(image_list, text_ptr, rec_scores_ptr,
+                                     start_index, end_index, indices)) {
+        FDERROR << "There's error while recognizing image in PPOCR."
+                << std::endl;
+        return false;
+      }
+    }
+  }
+
+  if (!table_->BatchPredict(images, batch_result)) {
+    FDERROR << "There's error while recognizing tables in images." << std::endl;
+    return false;
+  }
+
+  for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) {
+    ultrainfer::vision::OCRResult &ocr_result = (*batch_result)[i_batch];
+    std::vector<std::vector<std::string>> matched(ocr_result.table_boxes.size(),
+                                                  std::vector<std::string>());
+
+    std::vector<int> ocr_box;
+    std::vector<int> structure_box;
+    for (int i = 0; i < ocr_result.boxes.size(); i++) {
+      ocr_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.boxes[i]);
+      ocr_box[0] -= 1;
+      ocr_box[1] -= 1;
+      ocr_box[2] += 1;
+      ocr_box[3] += 1;
+
+      std::vector<std::vector<float>> dis_list(ocr_result.table_boxes.size(),
+                                               std::vector<float>(3, 100000.0));
+
+      for (int j = 0; j < ocr_result.table_boxes.size(); j++) {
+        structure_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.table_boxes[j]);
+        dis_list[j][0] = vision::ocr::Dis(ocr_box, structure_box);
+        dis_list[j][1] = 1 - vision::ocr::Iou(ocr_box, structure_box);
+        dis_list[j][2] = j;
+      }
+
+      // find min dis idx
+      std::sort(dis_list.begin(), dis_list.end(), vision::ocr::ComparisonDis);
+      matched[dis_list[0][2]].push_back(ocr_result.text[i]);
+    }
+
+    // get pred html
+    std::string html_str = "";
+    int td_tag_idx = 0;
+    auto structure_html_tags = ocr_result.table_structure;
+    for (int i = 0; i < structure_html_tags.size(); i++) {
+      if (structure_html_tags[i].find("</td>") != std::string::npos) {
+        if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
+          html_str += "<td>";
+        }
+        if (matched[td_tag_idx].size() > 0) {
+          bool b_with = false;
+          if (matched[td_tag_idx][0].find("<b>") != std::string::npos &&
+              matched[td_tag_idx].size() > 1) {
+            b_with = true;
+            html_str += "<b>";
+          }
+          for (int j = 0; j < matched[td_tag_idx].size(); j++) {
+            std::string content = matched[td_tag_idx][j];
+            if (matched[td_tag_idx].size() > 1) {
+              // remove blank, <b> and </b>
+              if (content.length() > 0 && content.at(0) == ' ') {
+                content = content.substr(0);
+              }
+              if (content.length() > 2 && content.substr(0, 3) == "<b>") {
+                content = content.substr(3);
+              }
+              if (content.length() > 4 &&
+                  content.substr(content.length() - 4) == "</b>") {
+                content = content.substr(0, content.length() - 4);
+              }
+              if (content.empty()) {
+                continue;
+              }
+              // add blank
+              if (j != matched[td_tag_idx].size() - 1 &&
+                  content.at(content.length() - 1) != ' ') {
+                content += ' ';
+              }
+            }
+            html_str += content;
+          }
+          if (b_with) {
+            html_str += "</b>";
+          }
+        }
+        if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
+          html_str += "</td>";
+        } else {
+          html_str += structure_html_tags[i];
+        }
+        td_tag_idx += 1;
+      } else {
+        html_str += structure_html_tags[i];
+      }
+    }
+    (*batch_result)[i_batch].table_html = html_str;
+  }
+
+  return true;
+}
+
+} // namespace pipeline
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h
new file mode 100755
index 0000000000..9cc6f7fb88
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/ppstructurev2_table.h
@@ -0,0 +1,101 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/ocr/ppocr/dbdetector.h"
+#include "ultrainfer/vision/ocr/ppocr/recognizer.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_table.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+/** \brief This pipeline can launch detection model, classification model and
+ * recognition model sequentially. All OCR pipeline APIs are defined inside this
+ * namespace.
+ *
+ */
+namespace pipeline {
+/*! @brief PPStructureV2Table is used to load PP-OCRv2 series models provided by
+ * PaddleOCR.
+ */
+class ULTRAINFER_DECL PPStructureV2Table : public UltraInferModel {
+public:
+  /** \brief Set up the detection model path, recognition model path and table
+   * model path respectively.
+   *
+   * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer
+   * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer
+   * \param[in] table_model Path of table recognition model, e.g
+   * ./en_ppstructure_mobile_v2.0_SLANet_infer
+   */
+  PPStructureV2Table(ultrainfer::vision::ocr::DBDetector *det_model,
+                     ultrainfer::vision::ocr::Recognizer *rec_model,
+                     ultrainfer::vision::ocr::StructureV2Table *table_model);
+
+  /** \brief Clone a new PPStructureV2Table with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new PPStructureV2Table* type unique pointer
+   */
+  std::unique_ptr<PPStructureV2Table> Clone() const;
+
+  /** \brief Predict the input image and get OCR result.
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format. \param[in] result The output OCR result will
+   * be writen to this structure. \return true if the prediction successed,
+   * otherwise false.
+   */
+  virtual bool Predict(cv::Mat *img, ultrainfer::vision::OCRResult *result);
+  virtual bool Predict(const cv::Mat &img,
+                       ultrainfer::vision::OCRResult *result);
+  /** \brief BatchPredict the input image and get OCR result.
+   *
+   * \param[in] images The list of input image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] batch_result The output
+   * list of OCR result will be writen to this structure. \return true if the
+   * prediction successed, otherwise false.
+   */
+  virtual bool
+  BatchPredict(const std::vector<cv::Mat> &images,
+               std::vector<ultrainfer::vision::OCRResult> *batch_result);
+
+  bool Initialized() const override;
+  bool SetRecBatchSize(int rec_batch_size);
+  int GetRecBatchSize();
+
+protected:
+  ultrainfer::vision::ocr::DBDetector *detector_ = nullptr;
+  ultrainfer::vision::ocr::Recognizer *recognizer_ = nullptr;
+  ultrainfer::vision::ocr::StructureV2Table *table_ = nullptr;
+
+private:
+  int rec_batch_size_ = 6;
+};
+
+namespace application {
+namespace ocrsystem {
+typedef pipeline::PPStructureV2Table PPStructureV2TableSystem;
+} // namespace ocrsystem
+} // namespace application
+
+} // namespace pipeline
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc
new file mode 100755
index 0000000000..8790a5deee
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.cc
@@ -0,0 +1,150 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/rec_postprocessor.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+std::vector<std::string> ReadDict(const std::string &path) {
+  std::ifstream in(path);
+  FDASSERT(in, "Cannot open file %s to read.", path.c_str());
+  std::string line;
+  std::vector<std::string> m_vec;
+  while (getline(in, line)) {
+    m_vec.push_back(line);
+  }
+  m_vec.insert(m_vec.begin(), "#"); // blank char for ctc
+  m_vec.push_back(" ");
+  return m_vec;
+}
+
+RecognizerPostprocessor::RecognizerPostprocessor() { initialized_ = false; }
+
+RecognizerPostprocessor::RecognizerPostprocessor(
+    const std::string &label_path) {
+  // init label_lsit
+  label_list_ = ReadDict(label_path);
+  initialized_ = true;
+}
+
+bool RecognizerPostprocessor::SingleBatchPostprocessor(
+    const float *out_data, const std::vector<int64_t> &output_shape,
+    std::string *text, float *rec_score) {
+  std::string &str_res = *text;
+  float &score = *rec_score;
+  score = 0.f;
+  int argmax_idx;
+  int last_index = 0;
+  int count = 0;
+  float max_value = 0.0f;
+
+  for (int n = 0; n < output_shape[1]; n++) {
+    argmax_idx = int(
+        std::distance(&out_data[n * output_shape[2]],
+                      std::max_element(&out_data[n * output_shape[2]],
+                                       &out_data[(n + 1) * output_shape[2]])));
+
+    max_value = float(*std::max_element(&out_data[n * output_shape[2]],
+                                        &out_data[(n + 1) * output_shape[2]]));
+
+    if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
+      score += max_value;
+      count += 1;
+      if (argmax_idx > label_list_.size()) {
+        FDERROR << "The output index: " << argmax_idx
+                << " is larger than the size of label_list: "
+                << label_list_.size() << ". Please check the label file!"
+                << std::endl;
+        return false;
+      }
+      str_res += label_list_[argmax_idx];
+    }
+    last_index = argmax_idx;
+  }
+  score /= (count + 1e-6);
+  if (count == 0 || std::isnan(score)) {
+    score = 0.f;
+  }
+  return true;
+}
+
+bool RecognizerPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                                  std::vector<std::string> *texts,
+                                  std::vector<float> *rec_scores) {
+  // Recognizer have only 1 output tensor.
+  // For Recognizer, the output tensor shape = [batch, ?, 6625]
+  size_t total_size = tensors[0].shape[0];
+  return Run(tensors, texts, rec_scores, 0, total_size, {});
+}
+
+bool RecognizerPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                                  std::vector<std::string> *texts,
+                                  std::vector<float> *rec_scores,
+                                  size_t start_index, size_t total_size,
+                                  const std::vector<int> &indices) {
+  if (!initialized_) {
+    FDERROR << "Postprocessor is not initialized." << std::endl;
+    return false;
+  }
+
+  // Recognizer have only 1 output tensor.
+  const FDTensor &tensor = tensors[0];
+  // For Recognizer, the output tensor shape = [batch, ?, 6625]
+  size_t batch = tensor.shape[0];
+  size_t length = accumulate(tensor.shape.begin() + 1, tensor.shape.end(), 1,
+                             std::multiplies<int>());
+
+  if (batch <= 0) {
+    FDERROR << "The infer outputTensor.shape[0] <=0, wrong infer result."
+            << std::endl;
+    return false;
+  }
+  if (start_index < 0 || total_size <= 0) {
+    FDERROR << "start_index or total_size error. Correct is: 0 <= start_index "
+               "< total_size"
+            << std::endl;
+    return false;
+  }
+  if ((start_index + batch) > total_size) {
+    FDERROR << "start_index or total_size error. Correct is: start_index + "
+               "batch(outputTensor.shape[0]) <= total_size"
+            << std::endl;
+    return false;
+  }
+  texts->resize(total_size);
+  rec_scores->resize(total_size);
+
+  const float *tensor_data = reinterpret_cast<const float *>(tensor.Data());
+  for (int i_batch = 0; i_batch < batch; ++i_batch) {
+    size_t real_index = i_batch + start_index;
+    if (indices.size() != 0) {
+      real_index = indices[i_batch + start_index];
+    }
+    if (!SingleBatchPostprocessor(tensor_data + i_batch * length, tensor.shape,
+                                  &texts->at(real_index),
+                                  &rec_scores->at(real_index))) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h
new file mode 100755
index 0000000000..9b21a61837
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_postprocessor.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Postprocessor object for Recognizer serials model.
+ */
+class ULTRAINFER_DECL RecognizerPostprocessor {
+public:
+  RecognizerPostprocessor();
+  /** \brief Create a postprocessor instance for Recognizer serials model
+   *
+   * \param[in] label_path The path of label_dict
+   */
+  explicit RecognizerPostprocessor(const std::string &label_path);
+
+  /** \brief Process the result of runtime and fill to RecognizerResult
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] texts The output text results of recognizer
+   * \param[in] rec_scores The output score results of recognizer
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<std::string> *texts, std::vector<float> *rec_scores);
+
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<std::string> *texts, std::vector<float> *rec_scores,
+           size_t start_index, size_t total_size,
+           const std::vector<int> &indices);
+
+private:
+  bool SingleBatchPostprocessor(const float *out_data,
+                                const std::vector<int64_t> &output_shape,
+                                std::string *text, float *rec_score);
+  bool initialized_ = false;
+  std::vector<std::string> label_list_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc
new file mode 100755
index 0000000000..4c49887caf
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.cc
@@ -0,0 +1,142 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/rec_preprocessor.h"
+
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+RecognizerPreprocessor::RecognizerPreprocessor() {
+  resize_op_ = std::make_shared<Resize>(-1, -1);
+
+  std::vector<float> value = {127, 127, 127};
+  pad_op_ = std::make_shared<Pad>(0, 0, 0, 0, value);
+
+  std::vector<float> mean = {0.5f, 0.5f, 0.5f};
+  std::vector<float> std = {0.5f, 0.5f, 0.5f};
+  normalize_permute_op_ =
+      std::make_shared<NormalizeAndPermute>(mean, std, true);
+  normalize_op_ = std::make_shared<Normalize>(mean, std, true);
+  hwc2chw_op_ = std::make_shared<HWC2CHW>();
+  cast_op_ = std::make_shared<Cast>("float");
+}
+
+void RecognizerPreprocessor::OcrRecognizerResizeImage(
+    FDMat *mat, float max_wh_ratio, const std::vector<int> &rec_image_shape,
+    bool static_shape_infer) {
+  int img_h, img_w;
+  img_h = rec_image_shape[1];
+  img_w = rec_image_shape[2];
+
+  if (!static_shape_infer) {
+    img_w = int(img_h * max_wh_ratio);
+    float ratio = float(mat->Width()) / float(mat->Height());
+
+    int resize_w;
+    if (ceilf(img_h * ratio) > img_w) {
+      resize_w = img_w;
+    } else {
+      resize_w = int(ceilf(img_h * ratio));
+    }
+    resize_op_->SetWidthAndHeight(resize_w, img_h);
+    (*resize_op_)(mat);
+    pad_op_->SetPaddingSize(0, 0, 0, int(img_w - mat->Width()));
+    (*pad_op_)(mat);
+  } else {
+    if (mat->Width() >= img_w) {
+      // Reszie W to 320
+      resize_op_->SetWidthAndHeight(img_w, img_h);
+      (*resize_op_)(mat);
+    } else {
+      resize_op_->SetWidthAndHeight(mat->Width(), img_h);
+      (*resize_op_)(mat);
+      // Pad to 320
+      pad_op_->SetPaddingSize(0, 0, 0, int(img_w - mat->Width()));
+      (*pad_op_)(mat);
+    }
+  }
+}
+
+bool RecognizerPreprocessor::Run(std::vector<FDMat> *images,
+                                 std::vector<FDTensor> *outputs,
+                                 size_t start_index, size_t end_index,
+                                 const std::vector<int> &indices) {
+  if (images->size() == 0 || end_index <= start_index ||
+      end_index > images->size()) {
+    FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
+               "end_index <= images->size()"
+            << std::endl;
+    return false;
+  }
+
+  std::vector<FDMat> mats(end_index - start_index);
+  for (size_t i = start_index; i < end_index; ++i) {
+    size_t real_index = i;
+    if (indices.size() != 0) {
+      real_index = indices[i];
+    }
+    mats[i - start_index] = images->at(real_index);
+  }
+  return Run(&mats, outputs);
+}
+
+bool RecognizerPreprocessor::Apply(FDMatBatch *image_batch,
+                                   std::vector<FDTensor> *outputs) {
+  int img_h = rec_image_shape_[1];
+  int img_w = rec_image_shape_[2];
+  float max_wh_ratio = img_w * 1.0 / img_h;
+  float ori_wh_ratio;
+
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    ori_wh_ratio = mat->Width() * 1.0 / mat->Height();
+    max_wh_ratio = std::max(max_wh_ratio, ori_wh_ratio);
+  }
+
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_,
+                             static_shape_infer_);
+  }
+
+  if (!disable_normalize_ && !disable_permute_) {
+    (*normalize_permute_op_)(image_batch);
+  } else {
+    if (!disable_normalize_) {
+      (*normalize_op_)(image_batch);
+    }
+    if (!disable_permute_) {
+      (*hwc2chw_op_)(image_batch);
+      (*cast_op_)(image_batch);
+    }
+  }
+
+  // Only have 1 output Tensor.
+  outputs->resize(1);
+  // Get the NCHW tensor
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h
new file mode 100755
index 0000000000..58fd675468
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/rec_preprocessor.h
@@ -0,0 +1,101 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Preprocessor object for PaddleClas serials model.
+ */
+class ULTRAINFER_DECL RecognizerPreprocessor : public ProcessorManager {
+public:
+  RecognizerPreprocessor();
+  using ProcessorManager::Run;
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input data list, all the elements are FDMat
+   * \param[in] outputs The output tensors which will be fed into runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           size_t start_index, size_t end_index,
+           const std::vector<int> &indices);
+
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// Set static_shape_infer is true or not. When deploy PP-OCR
+  /// on hardware which can not support dynamic input shape very well,
+  /// like Huawei Ascned, static_shape_infer needs to to be true.
+  void SetStaticShapeInfer(bool static_shape_infer) {
+    static_shape_infer_ = static_shape_infer;
+  }
+  /// Get static_shape_infer of the recognition preprocess
+  bool GetStaticShapeInfer() const { return static_shape_infer_; }
+
+  /// Set preprocess normalize parameters, please call this API to customize
+  /// the normalize parameters, otherwise it will use the default normalize
+  /// parameters.
+  void SetNormalize(const std::vector<float> &mean,
+                    const std::vector<float> &std, bool is_scale) {
+    normalize_permute_op_ =
+        std::make_shared<NormalizeAndPermute>(mean, std, is_scale);
+    normalize_op_ = std::make_shared<Normalize>(mean, std, is_scale);
+  }
+
+  /// Set rec_image_shape for the recognition preprocess
+  void SetRecImageShape(const std::vector<int> &rec_image_shape) {
+    rec_image_shape_ = rec_image_shape;
+  }
+  /// Get rec_image_shape for the recognition preprocess
+  std::vector<int> GetRecImageShape() { return rec_image_shape_; }
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+
+private:
+  void OcrRecognizerResizeImage(FDMat *mat, float max_wh_ratio,
+                                const std::vector<int> &rec_image_shape,
+                                bool static_shape_infer);
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  std::vector<int> rec_image_shape_ = {3, 48, 320};
+  bool static_shape_infer_ = false;
+  std::shared_ptr<Resize> resize_op_;
+  std::shared_ptr<Pad> pad_op_;
+  std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
+  std::shared_ptr<Normalize> normalize_op_;
+  std::shared_ptr<HWC2CHW> hwc2chw_op_;
+  std::shared_ptr<Cast> cast_op_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc
new file mode 100755
index 0000000000..47dcdc283e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.cc
@@ -0,0 +1,136 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/recognizer.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+Recognizer::Recognizer() {}
+
+Recognizer::Recognizer(const std::string &model_file,
+                       const std::string &params_file,
+                       const std::string &label_path,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format)
+    : postprocessor_(label_path) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+// Init
+bool Recognizer::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+std::unique_ptr<Recognizer> Recognizer::Clone() const {
+  std::unique_ptr<Recognizer> clone_model =
+      utils::make_unique<Recognizer>(Recognizer(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool Recognizer::Predict(const cv::Mat &img, std::string *text,
+                         float *rec_score) {
+  std::vector<std::string> texts(1);
+  std::vector<float> rec_scores(1);
+  bool success = BatchPredict({img}, &texts, &rec_scores);
+  if (!success) {
+    return success;
+  }
+  *text = std::move(texts[0]);
+  *rec_score = rec_scores[0];
+  return true;
+}
+
+bool Recognizer::Predict(const cv::Mat &img, vision::OCRResult *ocr_result) {
+  ocr_result->text.resize(1);
+  ocr_result->rec_scores.resize(1);
+  if (!Predict(img, &(ocr_result->text[0]), &(ocr_result->rec_scores[0]))) {
+    return false;
+  }
+  return true;
+}
+
+bool Recognizer::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<std::string> *texts,
+                              std::vector<float> *rec_scores) {
+  return BatchPredict(images, texts, rec_scores, 0, images.size(), {});
+}
+
+bool Recognizer::BatchPredict(const std::vector<cv::Mat> &images,
+                              vision::OCRResult *ocr_result) {
+  return BatchPredict(images, &(ocr_result->text), &(ocr_result->rec_scores));
+}
+
+bool Recognizer::BatchPredict(const std::vector<cv::Mat> &images,
+                              std::vector<std::string> *texts,
+                              std::vector<float> *rec_scores,
+                              size_t start_index, size_t end_index,
+                              const std::vector<int> &indices) {
+  size_t total_size = images.size();
+  if (indices.size() != 0 && indices.size() != total_size) {
+    FDERROR << "indices.size() should be 0 or images.size()." << std::endl;
+    return false;
+  }
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
+                         end_index, indices)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores,
+                          start_index, total_size, indices)) {
+    FDERROR << "Failed to postprocess the inference cls_results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h
new file mode 100755
index 0000000000..e156647665
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/recognizer.h
@@ -0,0 +1,122 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/rec_postprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/rec_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All OCR series model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+/*! @brief Recognizer object is used to load the recognition model provided by
+ * PaddleOCR.
+ */
+class ULTRAINFER_DECL Recognizer : public UltraInferModel {
+public:
+  Recognizer();
+  /** \brief Set path of model file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ./ch_PP-OCRv3_rec_infer/model.pdmodel. \param[in] params_file Path of
+   * parameter file, e.g ./ch_PP-OCRv3_rec_infer/model.pdiparams, if the model
+   * format is ONNX, this parameter will be ignored. \param[in] label_path Path
+   * of label file used by OCR recognition model. e.g ./ppocr_keys_v1.txt
+   * \param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends`. \param[in]
+   * model_format Model format of the loaded model, default is Paddle format.
+   */
+  Recognizer(const std::string &model_file, const std::string &params_file = "",
+             const std::string &label_path = "",
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /// Get model's name
+  std::string ModelName() const { return "ppocr/ocr_rec"; }
+
+  /** \brief Clone a new Recognizer with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new Recognizer* type unique pointer
+   */
+  virtual std::unique_ptr<Recognizer> Clone() const;
+
+  /** \brief Predict the input image and get OCR recognition model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] text The text result of rec
+   * model will be written into this parameter. \param[in] rec_score The sccore
+   * result of rec model will be written into this parameter. \return true if
+   * the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img, std::string *text, float *rec_score);
+
+  /** \brief Predict the input image and get OCR recognition model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR
+   * recognition model result will be writen to this structure. \return true if
+   * the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result);
+
+  /** \brief BatchPredict the input image and get OCR recognition model result.
+   *
+   * \param[in] images The list of input image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] ocr_result The output
+   * of OCR recognition model result will be writen to this structure. \return
+   * true if the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            vision::OCRResult *ocr_result);
+
+  /** \brief BatchPredict the input image and get OCR recognition model result.
+   *
+   * \param[in] images The list of input image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] texts The list of text
+   * results of rec model will be written into this vector. \param[in]
+   * rec_scores The list of sccore result of rec model will be written into this
+   * vector. \return true if the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<std::string> *texts,
+                            std::vector<float> *rec_scores);
+
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<std::string> *texts,
+                            std::vector<float> *rec_scores, size_t start_index,
+                            size_t end_index, const std::vector<int> &indices);
+
+  /// Get preprocessor reference of DBDetectorPreprocessor
+  virtual RecognizerPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of DBDetectorPostprocessor
+  virtual RecognizerPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+private:
+  bool Initialize();
+  RecognizerPreprocessor preprocessor_;
+  RecognizerPostprocessor postprocessor_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc
new file mode 100755
index 0000000000..a16be5aaa2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.cc
@@ -0,0 +1,102 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/structurev2_layout.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+StructureV2Layout::StructureV2Layout() {}
+StructureV2Layout::StructureV2Layout(const std::string &model_file,
+                                     const std::string &params_file,
+                                     const RuntimeOption &custom_option,
+                                     const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool StructureV2Layout::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<StructureV2Layout> StructureV2Layout::Clone() const {
+  std::unique_ptr<StructureV2Layout> clone_model =
+      utils::make_unique<StructureV2Layout>(StructureV2Layout(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool StructureV2Layout::Predict(cv::Mat *im, DetectionResult *result) {
+  return Predict(*im, result);
+}
+
+bool StructureV2Layout::Predict(const cv::Mat &im, DetectionResult *result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool StructureV2Layout::BatchPredict(const std::vector<cv::Mat> &images,
+                                     std::vector<DetectionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  auto batch_layout_img_info = preprocessor_.GetBatchLayoutImgInfo();
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results,
+                          *batch_layout_img_info)) {
+    FDERROR << "Failed to postprocess the inference results." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h
new file mode 100755
index 0000000000..132cd183f5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout.h
@@ -0,0 +1,101 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+/*! @brief StructureV2Layout object is used to load the PP-StructureV2-Layout
+ * detection model.
+ */
+class ULTRAINFER_DECL StructureV2Layout : public UltraInferModel {
+public:
+  StructureV2Layout();
+  /** \brief Set path of model file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ./picodet_lcnet_x1_0_fgd_layout_cdla_infer/model.pdmodel. \param[in]
+   * params_file Path of parameter file, e.g
+   * ./picodet_lcnet_x1_0_fgd_layout_cdla_infer/model.pdiparams, if the model
+   * format is ONNX, this parameter will be ignored. \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends`. \param[in] model_format Model
+   * format of the loaded model, default is Paddle format.
+   */
+  StructureV2Layout(const std::string &model_file,
+                    const std::string &params_file = "",
+                    const RuntimeOption &custom_option = RuntimeOption(),
+                    const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new StructureV2Layout with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return newStructureV2Layout* type unique pointer
+   */
+  virtual std::unique_ptr<StructureV2Layout> Clone() const;
+
+  /// Get model's name
+  std::string ModelName() const { return "pp-structurev2-layout"; }
+
+  /** \brief DEPRECATED Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, DetectionResult *result);
+
+  /** \brief Predict the detection result for an input image
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output detection result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, DetectionResult *result);
+
+  /** \brief Predict the detection result for an input image list
+   * \param[in] im The input image list, all the elements come from
+   * cv::imread(), is a 3-D array with layout HWC, BGR format \param[in] results
+   * The output detection result list \return true if the prediction successed,
+   * otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<DetectionResult> *results);
+
+  /// Get preprocessor reference ofStructureV2LayoutPreprocessor
+  virtual StructureV2LayoutPreprocessor &GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  /// Get postprocessor reference ofStructureV2LayoutPostprocessor
+  virtual StructureV2LayoutPostprocessor &GetPostprocessor() {
+    return postprocessor_;
+  }
+
+private:
+  bool Initialize();
+  StructureV2LayoutPreprocessor preprocessor_;
+  StructureV2LayoutPostprocessor postprocessor_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc
new file mode 100755
index 0000000000..b5a3385821
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.cc
@@ -0,0 +1,174 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+bool StructureV2LayoutPostprocessor::Run(
+    const std::vector<FDTensor> &tensors, std::vector<DetectionResult> *results,
+    const std::vector<std::array<int, 4>> &batch_layout_img_info) {
+  // A StructureV2Layout has 8 output tensors on which it then runs
+  // a GFL regression (namely, DisPred2Box), reference:
+  // PaddleOCR/blob/release/2.6/deploy/cpp_infer/src/postprocess_op.cpp#L511
+  int tensor_size = tensors.size();
+  FDASSERT(tensor_size == 8,
+           "StructureV2Layout should has 8 output tensors,"
+           "but got %d now!",
+           tensor_size)
+  FDASSERT((tensor_size / 2) == fpn_stride_.size(),
+           "found (tensor_size / 2) != fpn_stride_.size() !")
+  // TODO(qiuyanjun): may need to reorder the tensors according to
+  // fpn_stride_ and the shape of output tensors.
+  size_t batch = tensors[0].Shape()[0]; // [batch, ...]
+
+  results->resize(batch);
+  SetRegMax(tensors[fpn_stride_.size()].Shape()[2] / 4);
+  for (int batch_idx = 0; batch_idx < batch; ++batch_idx) {
+    std::vector<FDTensor> single_batch_tensors(8);
+    SetSingleBatchExternalData(tensors, single_batch_tensors, batch_idx);
+    SingleBatchPostprocessor(single_batch_tensors,
+                             batch_layout_img_info[batch_idx],
+                             &results->at(batch_idx));
+  }
+  return true;
+}
+
+void StructureV2LayoutPostprocessor::SetSingleBatchExternalData(
+    const std::vector<FDTensor> &tensors,
+    std::vector<FDTensor> &single_batch_tensors, size_t batch_idx) {
+  single_batch_tensors.resize(tensors.size());
+  for (int j = 0; j < tensors.size(); ++j) {
+    auto j_shape = tensors[j].Shape();
+    j_shape[0] = 1; // process b=1 per loop
+    size_t j_step =
+        accumulate(j_shape.begin(), j_shape.end(), 1, std::multiplies<int>());
+    const float *j_data_ptr =
+        reinterpret_cast<const float *>(tensors[j].Data());
+    const float *j_start_ptr = j_data_ptr + j_step * batch_idx;
+    single_batch_tensors[j].SetExternalData(
+        j_shape, tensors[j].Dtype(),
+        const_cast<void *>(reinterpret_cast<const void *>(j_start_ptr)),
+        tensors[j].device, tensors[j].device_id);
+  }
+}
+
+bool StructureV2LayoutPostprocessor::SingleBatchPostprocessor(
+    const std::vector<FDTensor> &single_batch_tensors,
+    const std::array<int, 4> &layout_img_info, DetectionResult *result) {
+  FDASSERT(single_batch_tensors.size() == 8,
+           "StructureV2Layout should has 8 output tensors,"
+           "but got %d now!",
+           static_cast<int>(single_batch_tensors.size()))
+  // layout_img_info: {image width, image height, resize width, resize height}
+  int img_w = layout_img_info[0];
+  int img_h = layout_img_info[1];
+  int in_w = layout_img_info[2];
+  int in_h = layout_img_info[3];
+  float scale_factor_w = static_cast<float>(in_w) / static_cast<float>(img_w);
+  float scale_factor_h = static_cast<float>(in_h) / static_cast<float>(img_h);
+
+  std::vector<DetectionResult> bbox_results;
+  bbox_results.resize(num_class_); // tmp result for each class
+
+  // decode score, label, box
+  for (int i = 0; i < fpn_stride_.size(); ++i) {
+    int feature_h = std::ceil(static_cast<float>(in_h) / fpn_stride_[i]);
+    int feature_w = std::ceil(static_cast<float>(in_w) / fpn_stride_[i]);
+    const FDTensor &prob_tensor = single_batch_tensors[i];
+    const FDTensor &bbox_tensor = single_batch_tensors[i + fpn_stride_.size()];
+    const float *prob_data =
+        reinterpret_cast<const float *>(prob_tensor.Data());
+    const float *bbox_data =
+        reinterpret_cast<const float *>(bbox_tensor.Data());
+    for (int idx = 0; idx < feature_h * feature_w; ++idx) {
+      // score and label
+      float score = 0.f;
+      int label = 0;
+      for (int j = 0; j < num_class_; ++j) {
+        if (prob_data[idx * num_class_ + j] > score) {
+          score = prob_data[idx * num_class_ + j];
+          label = j;
+        }
+      }
+      // bbox
+      if (score > score_threshold_) {
+        int row = idx / feature_w;
+        int col = idx % feature_w;
+        std::vector<float> bbox_pred(bbox_data + idx * 4 * reg_max_,
+                                     bbox_data + (idx + 1) * 4 * reg_max_);
+        bbox_results[label].boxes.push_back(DisPred2Bbox(
+            bbox_pred, col, row, fpn_stride_[i], in_w, in_h, reg_max_));
+        bbox_results[label].scores.push_back(score);
+        bbox_results[label].label_ids.push_back(label);
+      }
+    }
+  }
+
+  result->Clear();
+  // nms for per class, i in [0~num_class-1]
+  for (int i = 0; i < bbox_results.size(); ++i) {
+    if (bbox_results[i].boxes.size() <= 0) {
+      continue;
+    }
+    vision::utils::NMS(&bbox_results[i], nms_threshold_);
+    // fill output results
+    for (int j = 0; j < bbox_results[i].boxes.size(); ++j) {
+      result->scores.push_back(bbox_results[i].scores[j]);
+      result->label_ids.push_back(bbox_results[i].label_ids[j]);
+      result->boxes.push_back({
+          bbox_results[i].boxes[j][0] / scale_factor_w,
+          bbox_results[i].boxes[j][1] / scale_factor_h,
+          bbox_results[i].boxes[j][2] / scale_factor_w,
+          bbox_results[i].boxes[j][3] / scale_factor_h,
+      });
+    }
+  }
+  return true;
+}
+
+std::array<float, 4> StructureV2LayoutPostprocessor::DisPred2Bbox(
+    const std::vector<float> &bbox_pred, int x, int y, int stride, int resize_w,
+    int resize_h, int reg_max) {
+  float ct_x = (static_cast<float>(x) + 0.5f) * static_cast<float>(stride);
+  float ct_y = (static_cast<float>(y) + 0.5f) * static_cast<float>(stride);
+  std::vector<float> dis_pred;
+  dis_pred.resize(4);
+  for (int i = 0; i < 4; i++) {
+    std::vector<float> bbox_pred_i(bbox_pred.begin() + i * reg_max,
+                                   bbox_pred.begin() + (i + 1) * reg_max);
+    std::vector<float> dis_after_sm = ocr::Softmax(bbox_pred_i);
+    float dis = 0.0f;
+    for (int j = 0; j < reg_max; j++) {
+      dis += static_cast<float>(j) * dis_after_sm[j];
+    }
+    dis *= static_cast<float>(stride);
+    dis_pred[i] = dis;
+  }
+
+  float xmin = std::max(ct_x - dis_pred[0], 0.0f);
+  float ymin = std::max(ct_y - dis_pred[1], 0.0f);
+  float xmax = std::min(ct_x + dis_pred[2], static_cast<float>(resize_w));
+  float ymax = std::min(ct_y + dis_pred[3], static_cast<float>(resize_h));
+
+  return {xmin, ymin, xmax, ymax};
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h
new file mode 100755
index 0000000000..421fbea3f0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_postprocessor.h
@@ -0,0 +1,88 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+/*! @brief Postprocessor object for PaddleDet serials model.
+ */
+class ULTRAINFER_DECL StructureV2LayoutPostprocessor {
+public:
+  StructureV2LayoutPostprocessor() {}
+  /** \brief Process the result of runtime and fill to batch DetectionResult
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] results The output result of layout detection
+   * \param[in] batch_layout_img_info The image info of input images,
+   *            {{image width, image height, resize width, resize height},...}
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<DetectionResult> *results,
+           const std::vector<std::array<int, 4>> &batch_layout_img_info);
+
+  /// Set score_threshold_ for layout detection postprocess, default is 0.4
+  void SetScoreThreshold(float score_threshold) {
+    score_threshold_ = score_threshold;
+  }
+  /// Set nms_threshold_ for layout detection postprocess, default is 0.5
+  void SetNMSThreshold(float nms_threshold) { nms_threshold_ = nms_threshold; }
+  /// Set num_class_ for layout detection postprocess, default is 5
+  void SetNumClass(int num_class) { num_class_ = num_class; }
+  /// Set fpn_stride_ for layout detection postprocess, default is {8, 16, 32,
+  /// 64}
+  void SetFPNStride(const std::vector<int> &fpn_stride) {
+    fpn_stride_ = fpn_stride;
+  }
+  /// Set reg_max_ for layout detection postprocess, default is 8
+  void SetRegMax(int reg_max) { reg_max_ = reg_max; } // should private ?
+  /// Get score_threshold_ of layout detection postprocess, default is 0.4
+  float GetScoreThreshold() const { return score_threshold_; }
+  /// Get nms_threshold_ of layout detection postprocess, default is 0.5
+  float GetNMSThreshold() const { return nms_threshold_; }
+  /// Get num_class_ of layout detection postprocess, default is 5
+  int GetNumClass() const { return num_class_; }
+  /// Get fpn_stride_ of layout detection postprocess, default is {8, 16, 32,
+  /// 64}
+  std::vector<int> GetFPNStride() const { return fpn_stride_; }
+  /// Get reg_max_ of layout detection postprocess, default is 8
+  int GetRegMax() const { return reg_max_; }
+
+private:
+  std::array<float, 4> DisPred2Bbox(const std::vector<float> &bbox_pred, int x,
+                                    int y, int stride, int resize_w,
+                                    int resize_h, int reg_max);
+  bool
+  SingleBatchPostprocessor(const std::vector<FDTensor> &single_batch_tensors,
+                           const std::array<int, 4> &layout_img_info,
+                           DetectionResult *result);
+  void SetSingleBatchExternalData(const std::vector<FDTensor> &tensors,
+                                  std::vector<FDTensor> &single_batch_tensors,
+                                  size_t batch_idx);
+
+  std::vector<int> fpn_stride_ = {8, 16, 32, 64};
+  float score_threshold_ = 0.4;
+  float nms_threshold_ = 0.5;
+  int num_class_ = 5;
+  int reg_max_ = 8;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc
new file mode 100755
index 0000000000..4aed3e1c89
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+StructureV2LayoutPreprocessor::StructureV2LayoutPreprocessor() {
+  // default width(608) and height(900)
+  resize_op_ =
+      std::make_shared<Resize>(layout_image_shape_[2], layout_image_shape_[1]);
+  normalize_permute_op_ = std::make_shared<NormalizeAndPermute>(
+      std::vector<float>({0.485f, 0.456f, 0.406f}),
+      std::vector<float>({0.229f, 0.224f, 0.225f}), true);
+}
+
+std::array<int, 4> StructureV2LayoutPreprocessor::GetLayoutImgInfo(FDMat *img) {
+  if (static_shape_infer_) {
+    return {img->Width(), img->Height(), layout_image_shape_[2],
+            layout_image_shape_[1]};
+  } else {
+    FDASSERT(false, "not support dynamic shape inference now!")
+  }
+  return {img->Width(), img->Height(), layout_image_shape_[2],
+          layout_image_shape_[1]};
+}
+
+bool StructureV2LayoutPreprocessor::ResizeLayoutImage(FDMat *img, int resize_w,
+                                                      int resize_h) {
+  resize_op_->SetWidthAndHeight(resize_w, resize_h);
+  (*resize_op_)(img);
+  return true;
+}
+
+bool StructureV2LayoutPreprocessor::Apply(FDMatBatch *image_batch,
+                                          std::vector<FDTensor> *outputs) {
+  batch_layout_img_info_.clear();
+  batch_layout_img_info_.resize(image_batch->mats->size());
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    batch_layout_img_info_[i] = GetLayoutImgInfo(mat);
+    ResizeLayoutImage(mat, batch_layout_img_info_[i][2],
+                      batch_layout_img_info_[i][3]);
+  }
+  if (!disable_normalize_ && !disable_permute_) {
+    (*normalize_permute_op_)(image_batch);
+  }
+
+  outputs->resize(1);
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h
new file mode 100755
index 0000000000..1288f5cbe8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_layout_preprocessor.h
@@ -0,0 +1,90 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Preprocessor object for DBDetector serials model.
+ */
+class ULTRAINFER_DECL StructureV2LayoutPreprocessor : public ProcessorManager {
+public:
+  StructureV2LayoutPreprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// Set preprocess normalize parameters, please call this API to customize
+  /// the normalize parameters, otherwise it will use the default normalize
+  /// parameters.
+  void SetNormalize(const std::vector<float> &mean,
+                    const std::vector<float> &std, bool is_scale) {
+    normalize_permute_op_ =
+        std::make_shared<NormalizeAndPermute>(mean, std, is_scale);
+  }
+
+  /// Get the image info of the last batch, return a list of array
+  /// {image width, image height, resize width, resize height}
+  const std::vector<std::array<int, 4>> *GetBatchLayoutImgInfo() {
+    return &batch_layout_img_info_;
+  }
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+  /// Set image_shape for the detection preprocess.
+  /// This api is usually used when you retrain the model.
+  /// Generally, you do not need to use it.
+  void SetLayoutImageShape(const std::vector<int> &image_shape) {
+    layout_image_shape_ = image_shape;
+  }
+  /// Get cls_image_shape for the classification preprocess
+  std::vector<int> GetLayoutImageShape() const { return layout_image_shape_; }
+  /// Set static_shape_infer is true or not. When deploy PP-StructureV2
+  /// on hardware which can not support dynamic input shape very well,
+  /// like Huawei Ascned, static_shape_infer needs to to be true.
+  void SetStaticShapeInfer(bool static_shape_infer) {
+    static_shape_infer_ = static_shape_infer;
+  }
+  /// Get static_shape_infer of the recognition preprocess
+  bool GetStaticShapeInfer() const { return static_shape_infer_; }
+
+private:
+  bool ResizeLayoutImage(FDMat *img, int resize_w, int resize_h);
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  std::vector<std::array<int, 4>> batch_layout_img_info_;
+  std::shared_ptr<Resize> resize_op_;
+  std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
+  std::vector<int> layout_image_shape_ = {3, 800, 608}; // c,h,w
+  // default true for pp-structurev2-layout model, backbone picodet.
+  bool static_shape_infer_ = true;
+  std::array<int, 4> GetLayoutImgInfo(FDMat *img);
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc
new file mode 100755
index 0000000000..47c3109cb9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+StructureV2SERViLayoutXLMModel::StructureV2SERViLayoutXLMModel(
+    const std::string &model_file, const std::string &params_file,
+    const std::string &config_file, const RuntimeOption &custom_option,
+    const ModelFormat &model_format) {
+  if (model_format == ModelFormat::PADDLE) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ipu_backends = {Backend::PDINFER};
+    valid_directml_backends = {Backend::ORT};
+  } else if (model_format == ModelFormat::SOPHGO) {
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+  } else {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_directml_backends = {Backend::ORT};
+    valid_horizon_backends = {Backend::HORIZONNPU};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+std::unique_ptr<StructureV2SERViLayoutXLMModel>
+StructureV2SERViLayoutXLMModel::Clone() const {
+  std::unique_ptr<StructureV2SERViLayoutXLMModel> clone_model =
+      utils::make_unique<StructureV2SERViLayoutXLMModel>(
+          StructureV2SERViLayoutXLMModel(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool StructureV2SERViLayoutXLMModel::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h
new file mode 100755
index 0000000000..480acd748b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All classification model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+/*! @brief StructureV2SERViLayoutXLM model object used when to load a
+ * StructureV2SERViLayoutXLM model exported by StructureV2SERViLayoutXLMModel
+ * repository
+ */
+class ULTRAINFER_DECL StructureV2SERViLayoutXLMModel : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ser_vi_layoutxlm/model.pdmodel \param[in] params_file Path of parameter
+   * file, e.g ser_vi_layoutxlm/model.pdiparams, if the model format is ONNX,
+   * this parameter will be ignored \param[in] config_file Path of configuration
+   * file for deployment, e.g ser_vi_layoutxlm/infer_cfg.yml \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in `valid_cpu_backends` \param[in] model_format
+   * Model format of the loaded model, default is Paddle format
+   */
+  StructureV2SERViLayoutXLMModel(
+      const std::string &model_file, const std::string &params_file,
+      const std::string &config_file,
+      const RuntimeOption &custom_option = RuntimeOption(),
+      const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new StructureV2SERViLayoutXLMModel with less memory usage
+   * when multiple instances of the same model are created
+   *
+   * \return new StructureV2SERViLayoutXLMModel* type unique pointer
+   */
+  virtual std::unique_ptr<StructureV2SERViLayoutXLMModel> Clone() const;
+
+  /// Get model's name
+  virtual std::string ModelName() const {
+    return "StructureV2SERViLayoutXLMModel";
+  }
+
+protected:
+  bool Initialize();
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc
new file mode 100755
index 0000000000..f2e4cf0e60
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.cc
@@ -0,0 +1,134 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/structurev2_table.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+StructureV2Table::StructureV2Table() {}
+StructureV2Table::StructureV2Table(const std::string &model_file,
+                                   const std::string &params_file,
+                                   const std::string &table_char_dict_path,
+                                   const std::string &box_shape,
+                                   const RuntimeOption &custom_option,
+                                   const ModelFormat &model_format)
+    : postprocessor_(table_char_dict_path, box_shape) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+// Init
+bool StructureV2Table::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<StructureV2Table> StructureV2Table::Clone() const {
+  std::unique_ptr<StructureV2Table> clone_model =
+      utils::make_unique<StructureV2Table>(StructureV2Table(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool StructureV2Table::Predict(const cv::Mat &img,
+                               std::vector<std::array<int, 8>> *boxes_result,
+                               std::vector<std::string> *structure_result) {
+  std::vector<std::vector<std::array<int, 8>>> det_results;
+  std::vector<std::vector<std::string>> structure_results;
+  if (!BatchPredict({img}, &det_results, &structure_results)) {
+    return false;
+  }
+  *boxes_result = std::move(det_results[0]);
+  *structure_result = std::move(structure_results[0]);
+  return true;
+}
+
+bool StructureV2Table::Predict(const cv::Mat &img,
+                               vision::OCRResult *ocr_result) {
+  if (!Predict(img, &(ocr_result->table_boxes),
+               &(ocr_result->table_structure))) {
+    return false;
+  }
+  return true;
+}
+
+bool StructureV2Table::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<vision::OCRResult> *ocr_results) {
+  std::vector<std::vector<std::array<int, 8>>> det_results;
+  std::vector<std::vector<std::string>> structure_results;
+  if (!BatchPredict(images, &det_results, &structure_results)) {
+    return false;
+  }
+  ocr_results->resize(det_results.size());
+  for (int i = 0; i < det_results.size(); i++) {
+    (*ocr_results)[i].table_boxes = std::move(det_results[i]);
+    (*ocr_results)[i].table_structure = std::move(structure_results[i]);
+  }
+  return true;
+}
+
+bool StructureV2Table::BatchPredict(
+    const std::vector<cv::Mat> &images,
+    std::vector<std::vector<std::array<int, 8>>> *det_results,
+    std::vector<std::vector<std::string>> *structure_results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  auto batch_det_img_info = preprocessor_.GetBatchImgInfo();
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, det_results,
+                          structure_results, *batch_det_img_info)) {
+    FDERROR << "Failed to postprocess the inference cls_results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h
new file mode 100755
index 0000000000..152d6cc0f6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table.h
@@ -0,0 +1,126 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All OCR series model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+
+/*! @brief DBDetector object is used to load the detection model provided by
+ * PaddleOCR.
+ */
+class ULTRAINFER_DECL StructureV2Table : public UltraInferModel {
+public:
+  StructureV2Table();
+  /** \brief Set path of model file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel. \param[in]
+   * params_file Path of parameter file, e.g
+   * ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model
+   * format is ONNX, this parameter will be ignored. \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends`. \param[in] model_format Model
+   * format of the loaded model, default is Paddle format. \param[in] box_shape
+   * Type of output box, default is ori.
+   */
+  StructureV2Table(const std::string &model_file,
+                   const std::string &params_file = "",
+                   const std::string &table_char_dict_path = "",
+                   const std::string &box_shape = "ori",
+                   const RuntimeOption &custom_option = RuntimeOption(),
+                   const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new StructureV2Table Recognizer with less memory usage when
+   * multiple instances of the same model are created
+   *
+   * \return new StructureV2Table* type unique pointer
+   */
+  virtual std::unique_ptr<StructureV2Table> Clone() const;
+
+  /// Get model's name
+  std::string ModelName() const { return "ppocr/ocr_table"; }
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] boxes_result The output of
+   * OCR detection model result will be writen to this structure. \return true
+   * if the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img,
+                       std::vector<std::array<int, 8>> *boxes_result,
+                       std::vector<std::string> *structure_result);
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] ocr_result The output of OCR
+   * detection model result will be writen to this structure. \return true if
+   * the prediction is successed, otherwise false.
+   */
+  virtual bool Predict(const cv::Mat &img, vision::OCRResult *ocr_result);
+
+  /** \brief BatchPredict the input image and get OCR detection model result.
+   *
+   * \param[in] images The list input of image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] det_results The output
+   * of OCR detection model result will be writen to this structure. \return
+   * true if the prediction is successed, otherwise false.
+   */
+  virtual bool
+  BatchPredict(const std::vector<cv::Mat> &images,
+               std::vector<std::vector<std::array<int, 8>>> *det_results,
+               std::vector<std::vector<std::string>> *structure_results);
+
+  /** \brief BatchPredict the input image and get OCR detection model result.
+   *
+   * \param[in] images The list input of image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] ocr_results The output
+   * of OCR detection model result will be writen to this structure. \return
+   * true if the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<vision::OCRResult> *ocr_results);
+
+  /// Get preprocessor reference of StructureV2TablePreprocessor
+  virtual StructureV2TablePreprocessor &GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  /// Get postprocessor reference of StructureV2TablePostprocessor
+  virtual StructureV2TablePostprocessor &GetPostprocessor() {
+    return postprocessor_;
+  }
+
+private:
+  bool Initialize();
+  StructureV2TablePreprocessor preprocessor_;
+  StructureV2TablePostprocessor postprocessor_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc
new file mode 100755
index 0000000000..36498215b1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.cc
@@ -0,0 +1,182 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h"
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+StructureV2TablePostprocessor::StructureV2TablePostprocessor() {
+  initialized_ = false;
+}
+
+StructureV2TablePostprocessor::StructureV2TablePostprocessor(
+    const std::string &dict_path, const std::string &box_shape)
+    : box_shape(box_shape) {
+  std::ifstream in(dict_path);
+
+  FDASSERT(in, "Cannot open file %s to read.", dict_path.c_str());
+  std::string line;
+  dict_character.clear();
+  dict_character.push_back("sos"); // add special character
+  while (getline(in, line)) {
+    dict_character.push_back(line);
+  }
+
+  if (merge_no_span_structure) {
+    if (std::find(dict_character.begin(), dict_character.end(), "<td></td>") ==
+        dict_character.end()) {
+      dict_character.push_back("<td></td>");
+    }
+    for (auto it = dict_character.begin(); it != dict_character.end();) {
+      if (*it == "<td>") {
+        it = dict_character.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+
+  dict_character.push_back("eos"); // add special character
+  dict.clear();
+  for (size_t i = 0; i < dict_character.size(); i++) {
+    dict[dict_character[i]] = int(i);
+    if (dict_character[i] == "beg") {
+      ignore_beg_token_idx = i;
+    } else if (dict_character[i] == "end") {
+      ignore_end_token_idx = i;
+    }
+  }
+  dict_end_idx = dict_character.size() - 1;
+
+  initialized_ = true;
+}
+
+bool StructureV2TablePostprocessor::SingleBatchPostprocessor(
+    const float *structure_probs, const float *bbox_preds, size_t slice_dim,
+    size_t prob_dim, size_t box_dim, int img_width, int img_height,
+    float ratio_h, float ratio_w, int pad_h, int pad_w,
+    std::vector<std::array<int, 8>> *boxes_result,
+    std::vector<std::string> *structure_list_result) {
+  structure_list_result->push_back("<html>");
+  structure_list_result->push_back("<body>");
+  structure_list_result->push_back("<table>");
+
+  for (int i = 0; i < slice_dim; i++) {
+    int structure_idx = 0;
+    float structure_prob = structure_probs[i * prob_dim];
+    for (int j = 0; j < prob_dim; j++) {
+      if (structure_probs[i * prob_dim + j] > structure_prob) {
+        structure_prob = structure_probs[i * prob_dim + j];
+        structure_idx = j;
+      }
+    }
+
+    if (structure_idx > 0 && structure_idx == dict_end_idx)
+      break;
+
+    if (structure_idx == ignore_end_token_idx ||
+        structure_idx == ignore_beg_token_idx)
+      continue;
+
+    std::string text = dict_character[structure_idx];
+    if (std::find(td_tokens.begin(), td_tokens.end(), text) !=
+        td_tokens.end()) {
+      std::array<int, 8> bbox;
+      // box dim: en->4, ch->8
+
+      if (box_dim == 4) {
+        bbox[0] = bbox_preds[i * box_dim] * img_width;
+        bbox[1] = bbox_preds[i * box_dim + 1] * img_height;
+
+        bbox[2] = bbox_preds[i * box_dim + 2] * img_width;
+        bbox[3] = bbox_preds[i * box_dim + 1] * img_height;
+
+        bbox[4] = bbox_preds[i * box_dim + 2] * img_width;
+        bbox[5] = bbox_preds[i * box_dim + 3] * img_height;
+
+        bbox[6] = bbox_preds[i * box_dim] * img_width;
+        bbox[7] = bbox_preds[i * box_dim + 3] * img_height;
+      } else {
+        for (int k = 0; k < 8; k++) {
+          float bbox_pred = bbox_preds[i * box_dim + k];
+          if (box_shape == "pad") {
+            bbox[k] = int(k % 2 == 0 ? bbox_pred * pad_w / ratio_w
+                                     : bbox_pred * pad_h / ratio_h);
+          } else {
+            bbox[k] = int(k % 2 == 0 ? bbox_pred * img_width
+                                     : bbox_pred * img_height);
+          }
+        }
+      }
+
+      boxes_result->push_back(bbox);
+    }
+    structure_list_result->push_back(text);
+  }
+  structure_list_result->push_back("</table>");
+  structure_list_result->push_back("</body>");
+  structure_list_result->push_back("</html>");
+
+  return true;
+}
+
+bool StructureV2TablePostprocessor::Run(
+    const std::vector<FDTensor> &tensors,
+    std::vector<std::vector<std::array<int, 8>>> *bbox_batch_list,
+    std::vector<std::vector<std::string>> *structure_batch_list,
+    const std::vector<std::array<float, 6>> &batch_det_img_info) {
+  // Table have 2 output tensors.
+  const FDTensor &structure_probs = tensors[1];
+  const FDTensor &bbox_preds = tensors[0];
+
+  const float *structure_probs_data =
+      reinterpret_cast<const float *>(structure_probs.Data());
+  size_t structure_probs_length =
+      accumulate(structure_probs.shape.begin() + 1, structure_probs.shape.end(),
+                 1, std::multiplies<int>());
+
+  const float *bbox_preds_data =
+      reinterpret_cast<const float *>(bbox_preds.Data());
+  size_t bbox_preds_length =
+      accumulate(bbox_preds.shape.begin() + 1, bbox_preds.shape.end(), 1,
+                 std::multiplies<int>());
+  size_t batch = bbox_preds.shape[0];
+  size_t slice_dim = bbox_preds.shape[1];
+  size_t prob_dim = structure_probs.shape[2];
+  size_t box_dim = bbox_preds.shape[2];
+
+  bbox_batch_list->resize(batch);
+  structure_batch_list->resize(batch);
+
+  for (int i_batch = 0; i_batch < batch; ++i_batch) {
+    SingleBatchPostprocessor(
+        structure_probs_data, bbox_preds_data, slice_dim, prob_dim, box_dim,
+        batch_det_img_info[i_batch][0], batch_det_img_info[i_batch][1],
+        batch_det_img_info[i_batch][2], batch_det_img_info[i_batch][3],
+        batch_det_img_info[i_batch][4], batch_det_img_info[i_batch][5],
+        &bbox_batch_list->at(i_batch), &structure_batch_list->at(i_batch));
+    structure_probs_data = structure_probs_data + structure_probs_length;
+    bbox_preds_data = bbox_preds_data + bbox_preds_length;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h
new file mode 100755
index 0000000000..97b91f0545
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_postprocessor.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Postprocessor object for DBDetector serials model.
+ */
+
+class ULTRAINFER_DECL StructureV2TablePostprocessor {
+public:
+  StructureV2TablePostprocessor();
+  /** \brief Create a postprocessor instance for Recognizer serials model
+   *
+   * \param[in] label_path The path of label_dict
+   * \param[in] box_shape Type of output box, default is ori.
+   */
+
+  explicit StructureV2TablePostprocessor(const std::string &dict_path,
+                                         const std::string &box_shape);
+
+  /** \brief Process the result of runtime and fill to RecognizerResult
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] texts The output text results of recognizer
+   * \param[in] rec_scores The output score results of recognizer
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<std::vector<std::array<int, 8>>> *bbox_batch_list,
+           std::vector<std::vector<std::string>> *structure_batch_list,
+           const std::vector<std::array<float, 6>> &batch_det_img_info_);
+
+private:
+  PostProcessor util_post_processor_;
+  bool SingleBatchPostprocessor(
+      const float *structure_probs, const float *bbox_preds, size_t slice_dim,
+      size_t prob_dim, size_t box_dim, int img_width, int img_height,
+      float ratio_h, float ratio_w, int pad_h, int pad_w,
+      std::vector<std::array<int, 8>> *boxes_result,
+      std::vector<std::string> *structure_list_result);
+
+  bool merge_no_span_structure{true};
+  std::vector<std::string> dict_character;
+  std::string box_shape;
+  std::vector<std::string> td_tokens{"<td>", "<td", "<td></td>"};
+  std::map<std::string, int> dict;
+  int ignore_beg_token_idx;
+  int ignore_end_token_idx;
+  int dict_end_idx;
+  bool initialized_ = false;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc
new file mode 100755
index 0000000000..64cce7b452
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h"
+
+#include "ultrainfer/function/concat.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+StructureV2TablePreprocessor::StructureV2TablePreprocessor() {
+  resize_op_ = std::make_shared<Resize>(-1, -1);
+
+  std::vector<float> value = {0, 0, 0};
+  pad_op_ = std::make_shared<Pad>(0, 0, 0, 0, value);
+
+  std::vector<float> mean = {0.485f, 0.456f, 0.406f};
+  std::vector<float> std = {0.229f, 0.224f, 0.225f};
+  normalize_op_ = std::make_shared<Normalize>(mean, std, true);
+  hwc2chw_op_ = std::make_shared<HWC2CHW>();
+}
+
+void StructureV2TablePreprocessor::StructureV2TableResizeImage(FDMat *mat,
+                                                               int batch_idx) {
+  float img_h = float(rec_image_shape_[1]);
+  float img_w = float(rec_image_shape_[2]);
+  float width = float(mat->Width());
+  float height = float(mat->Height());
+  float ratio = float(float(max_len) / (std::max(height, width) * 1.0));
+  int resize_h = int(height * ratio);
+  int resize_w = int(width * ratio);
+
+  resize_op_->SetWidthAndHeight(resize_w, resize_h);
+  (*resize_op_)(mat);
+
+  (*normalize_op_)(mat);
+  pad_op_->SetPaddingSize(0, int(max_len - resize_h), 0,
+                          int(max_len - resize_w));
+  (*pad_op_)(mat);
+
+  (*hwc2chw_op_)(mat);
+
+  batch_det_img_info_[batch_idx] = {int(width),   int(height),  float(ratio),
+                                    float(ratio), int(max_len), int(max_len)};
+}
+
+bool StructureV2TablePreprocessor::Run(std::vector<FDMat> *images,
+                                       std::vector<FDTensor> *outputs,
+                                       size_t start_index, size_t end_index,
+                                       const std::vector<int> &indices) {
+  if (images->size() == 0 || end_index <= start_index ||
+      end_index > images->size()) {
+    FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
+               "end_index <= images->size()"
+            << std::endl;
+    return false;
+  }
+
+  std::vector<FDMat> mats(end_index - start_index);
+  for (size_t i = start_index; i < end_index; ++i) {
+    size_t real_index = i;
+    if (indices.size() != 0) {
+      real_index = indices[i];
+    }
+    mats[i - start_index] = images->at(real_index);
+  }
+  return Run(&mats, outputs);
+}
+
+bool StructureV2TablePreprocessor::Apply(FDMatBatch *image_batch,
+                                         std::vector<FDTensor> *outputs) {
+  batch_det_img_info_.clear();
+  batch_det_img_info_.resize(image_batch->mats->size());
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    StructureV2TableResizeImage(mat, i);
+  }
+
+  // Only have 1 output Tensor.
+  outputs->resize(1);
+  // Get the NCHW tensor
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h
new file mode 100755
index 0000000000..3d86933516
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/structurev2_table_preprocessor.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Preprocessor object for table model.
+ */
+class ULTRAINFER_DECL StructureV2TablePreprocessor : public ProcessorManager {
+public:
+  StructureV2TablePreprocessor();
+  using ProcessorManager::Run;
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input data list, all the elements are FDMat
+   * \param[in] outputs The output tensors which will be fed into runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat> *images, std::vector<FDTensor> *outputs,
+           size_t start_index, size_t end_index,
+           const std::vector<int> &indices);
+
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// Get the image info of the last batch, return a list of array
+  /// {image width, image height, resize width, resize height}
+  const std::vector<std::array<float, 6>> *GetBatchImgInfo() {
+    return &batch_det_img_info_;
+  }
+
+private:
+  void StructureV2TableResizeImage(FDMat *mat, int batch_idx);
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  int max_len = 488;
+  std::vector<int> rec_image_shape_ = {3, max_len, max_len};
+  bool static_shape_infer_ = false;
+  std::shared_ptr<Resize> resize_op_;
+  std::shared_ptr<Pad> pad_op_;
+  std::shared_ptr<Normalize> normalize_op_;
+  std::shared_ptr<HWC2CHW> hwc2chw_op_;
+  std::vector<std::array<float, 6>> batch_det_img_info_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc
new file mode 100755
index 0000000000..14ede4120b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.cc
@@ -0,0 +1,4374 @@
+/*******************************************************************************
+ *                                                                              *
+ * Author    :  Angus Johnson * Version   :  6.4.2 * Date      :  27 February
+ *2017                                                * Website   :
+ *http://www.angusj.com                                           * Copyright :
+ *Angus Johnson 2010-2017                                         *
+ *                                                                              *
+ * License: * Use, modification & distribution is subject to Boost Software
+ *License Ver 1. * http://www.boost.org/LICENSE_1_0.txt *
+ *                                                                              *
+ * Attributions: * The code in this library is an extension of Bala Vatti's
+ *clipping algorithm: * "A generic solution to polygon clipping" *
+ * Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
+ * http://portal.acm.org/citation.cfm?id=129906 *
+ *                                                                              *
+ * Computer graphics and geometric modeling: implementation and algorithms * By
+ *Max K. Agoston                                                            *
+ * Springer; 1 edition (January 4, 2005) *
+ * http://books.google.com/books?q=vatti+clipping+agoston *
+ *                                                                              *
+ * See also: * "Polygon Offsetting by Computing Winding Numbers" * Paper no.
+ *DETC2005-85513 pp. 565-575                                         * ASME 2005
+ *International Design Engineering Technical Conferences             * and
+ *Computers and Information in Engineering Conference (IDETC/CIE2005)      *
+ * September 24-28, 2005 , Long Beach, California, USA *
+ * http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
+ *                                                                              *
+ *******************************************************************************/
+
+/*******************************************************************************
+ *                                                                              *
+ * This is a translation of the Delphi Clipper library and the naming style *
+ * used has retained a Delphi flavour. *
+ *                                                                              *
+ *******************************************************************************/
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <ostream>
+#include <stdexcept>
+#include <vector>
+
+#include "clipper.h"
+
+namespace ClipperLib {
+
+static double const pi = 3.141592653589793238;
+static double const two_pi = pi * 2;
+static double const def_arc_tolerance = 0.25;
+
+enum Direction { dRightToLeft, dLeftToRight };
+
+static int const Unassigned = -1; // edge not currently 'owning' a solution
+static int const Skip = -2;       // edge that would otherwise close a path
+
+#define HORIZONTAL (-1.0E+40)
+#define TOLERANCE (1.0e-20)
+#define NEAR_ZERO(val) (((val) > -TOLERANCE) && ((val) < TOLERANCE))
+
+struct TEdge {
+  IntPoint Bot;
+  IntPoint Curr; // current (updated for every new scanbeam)
+  IntPoint Top;
+  double Dx;
+  PolyType PolyTyp;
+  EdgeSide Side; // side only refers to current side of solution poly
+  int WindDelta; // 1 or -1 depending on winding direction
+  int WindCnt;
+  int WindCnt2; // winding count of the opposite polytype
+  int OutIdx;
+  TEdge *Next;
+  TEdge *Prev;
+  TEdge *NextInLML;
+  TEdge *NextInAEL;
+  TEdge *PrevInAEL;
+  TEdge *NextInSEL;
+  TEdge *PrevInSEL;
+};
+
+struct IntersectNode {
+  TEdge *Edge1;
+  TEdge *Edge2;
+  IntPoint Pt;
+};
+
+struct LocalMinimum {
+  cInt Y;
+  TEdge *LeftBound;
+  TEdge *RightBound;
+};
+
+struct OutPt;
+
+// OutRec: contains a path in the clipping solution. Edges in the AEL will
+// carry a pointer to an OutRec when they are part of the clipping solution.
+struct OutRec {
+  int Idx;
+  bool IsHole;
+  bool IsOpen;
+  OutRec *FirstLeft; // see comments in clipper.pas
+  PolyNode *PolyNd;
+  OutPt *Pts;
+  OutPt *BottomPt;
+};
+
+struct OutPt {
+  int Idx;
+  IntPoint Pt;
+  OutPt *Next;
+  OutPt *Prev;
+};
+
+struct Join {
+  OutPt *OutPt1;
+  OutPt *OutPt2;
+  IntPoint OffPt;
+};
+
+struct LocMinSorter {
+  inline bool operator()(const LocalMinimum &locMin1,
+                         const LocalMinimum &locMin2) {
+    return locMin2.Y < locMin1.Y;
+  }
+};
+
+//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+
+inline cInt Round(double val) {
+  if ((val < 0))
+    return static_cast<cInt>(val - 0.5);
+  else
+    return static_cast<cInt>(val + 0.5);
+}
+//------------------------------------------------------------------------------
+
+inline cInt Abs(cInt val) { return val < 0 ? -val : val; }
+
+//------------------------------------------------------------------------------
+// PolyTree methods ...
+//------------------------------------------------------------------------------
+
+void PolyTree::Clear() {
+  for (PolyNodes::size_type i = 0; i < AllNodes.size(); ++i)
+    delete AllNodes[i];
+  AllNodes.resize(0);
+  Childs.resize(0);
+}
+//------------------------------------------------------------------------------
+
+PolyNode *PolyTree::GetFirst() const {
+  if (!Childs.empty())
+    return Childs[0];
+  else
+    return 0;
+}
+//------------------------------------------------------------------------------
+
+int PolyTree::Total() const {
+  int result = (int)AllNodes.size();
+  // with negative offsets, ignore the hidden outer polygon ...
+  if (result > 0 && Childs[0] != AllNodes[0])
+    result--;
+  return result;
+}
+
+//------------------------------------------------------------------------------
+// PolyNode methods ...
+//------------------------------------------------------------------------------
+
+PolyNode::PolyNode() : Parent(0), Index(0), m_IsOpen(false) {}
+//------------------------------------------------------------------------------
+
+int PolyNode::ChildCount() const { return (int)Childs.size(); }
+//------------------------------------------------------------------------------
+
+void PolyNode::AddChild(PolyNode &child) {
+  unsigned cnt = (unsigned)Childs.size();
+  Childs.push_back(&child);
+  child.Parent = this;
+  child.Index = cnt;
+}
+//------------------------------------------------------------------------------
+
+PolyNode *PolyNode::GetNext() const {
+  if (!Childs.empty())
+    return Childs[0];
+  else
+    return GetNextSiblingUp();
+}
+//------------------------------------------------------------------------------
+
+PolyNode *PolyNode::GetNextSiblingUp() const {
+  if (!Parent) // protects against PolyTree.GetNextSiblingUp()
+    return 0;
+  else if (Index == Parent->Childs.size() - 1)
+    return Parent->GetNextSiblingUp();
+  else
+    return Parent->Childs[Index + 1];
+}
+//------------------------------------------------------------------------------
+
+bool PolyNode::IsHole() const {
+  bool result = true;
+  PolyNode *node = Parent;
+  while (node) {
+    result = !result;
+    node = node->Parent;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+bool PolyNode::IsOpen() const { return m_IsOpen; }
+//------------------------------------------------------------------------------
+
+#ifndef use_int32
+
+//------------------------------------------------------------------------------
+// Int128 class (enables safe math on signed 64bit integers)
+// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1
+//    Int128 val2((long64)9223372036854775807);
+//    Int128 val3 = val1 * val2;
+//    val3.AsString => "85070591730234615847396907784232501249" (8.5e+37)
+//------------------------------------------------------------------------------
+
+class Int128 {
+public:
+  ulong64 lo;
+  long64 hi;
+
+  Int128(long64 _lo = 0) {
+    lo = (ulong64)_lo;
+    if (_lo < 0)
+      hi = -1;
+    else
+      hi = 0;
+  }
+
+  Int128(const Int128 &val) : lo(val.lo), hi(val.hi) {}
+
+  Int128(const long64 &_hi, const ulong64 &_lo) : lo(_lo), hi(_hi) {}
+
+  Int128 &operator=(const long64 &val) {
+    lo = (ulong64)val;
+    if (val < 0)
+      hi = -1;
+    else
+      hi = 0;
+    return *this;
+  }
+
+  bool operator==(const Int128 &val) const {
+    return (hi == val.hi && lo == val.lo);
+  }
+
+  bool operator!=(const Int128 &val) const { return !(*this == val); }
+
+  bool operator>(const Int128 &val) const {
+    if (hi != val.hi)
+      return hi > val.hi;
+    else
+      return lo > val.lo;
+  }
+
+  bool operator<(const Int128 &val) const {
+    if (hi != val.hi)
+      return hi < val.hi;
+    else
+      return lo < val.lo;
+  }
+
+  bool operator>=(const Int128 &val) const { return !(*this < val); }
+
+  bool operator<=(const Int128 &val) const { return !(*this > val); }
+
+  Int128 &operator+=(const Int128 &rhs) {
+    hi += rhs.hi;
+    lo += rhs.lo;
+    if (lo < rhs.lo)
+      hi++;
+    return *this;
+  }
+
+  Int128 operator+(const Int128 &rhs) const {
+    Int128 result(*this);
+    result += rhs;
+    return result;
+  }
+
+  Int128 &operator-=(const Int128 &rhs) {
+    *this += -rhs;
+    return *this;
+  }
+
+  Int128 operator-(const Int128 &rhs) const {
+    Int128 result(*this);
+    result -= rhs;
+    return result;
+  }
+
+  Int128 operator-() const // unary negation
+  {
+    if (lo == 0)
+      return Int128(-hi, 0);
+    else
+      return Int128(~hi, ~lo + 1);
+  }
+
+  operator double() const {
+    const double shift64 = 18446744073709551616.0; // 2^64
+    if (hi < 0) {
+      if (lo == 0)
+        return (double)hi * shift64;
+      else
+        return -(double)(~lo + ~hi * shift64);
+    } else
+      return (double)(lo + hi * shift64);
+  }
+};
+//------------------------------------------------------------------------------
+
+Int128 Int128Mul(long64 lhs, long64 rhs) {
+  bool negate = (lhs < 0) != (rhs < 0);
+
+  if (lhs < 0)
+    lhs = -lhs;
+  ulong64 int1Hi = ulong64(lhs) >> 32;
+  ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF);
+
+  if (rhs < 0)
+    rhs = -rhs;
+  ulong64 int2Hi = ulong64(rhs) >> 32;
+  ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF);
+
+  // nb: see comments in clipper.pas
+  ulong64 a = int1Hi * int2Hi;
+  ulong64 b = int1Lo * int2Lo;
+  ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi;
+
+  Int128 tmp;
+  tmp.hi = long64(a + (c >> 32));
+  tmp.lo = long64(c << 32);
+  tmp.lo += long64(b);
+  if (tmp.lo < b)
+    tmp.hi++;
+  if (negate)
+    tmp = -tmp;
+  return tmp;
+};
+#endif
+
+//------------------------------------------------------------------------------
+// Miscellaneous global functions
+//------------------------------------------------------------------------------
+
+bool Orientation(const Path &poly) { return Area(poly) >= 0; }
+//------------------------------------------------------------------------------
+
+double Area(const Path &poly) {
+  int size = (int)poly.size();
+  if (size < 3)
+    return 0;
+
+  double a = 0;
+  for (int i = 0, j = size - 1; i < size; ++i) {
+    a += ((double)poly[j].X + poly[i].X) * ((double)poly[j].Y - poly[i].Y);
+    j = i;
+  }
+  return -a * 0.5;
+}
+//------------------------------------------------------------------------------
+
+double Area(const OutPt *op) {
+  const OutPt *startOp = op;
+  if (!op)
+    return 0;
+  double a = 0;
+  do {
+    a += (double)(op->Prev->Pt.X + op->Pt.X) *
+         (double)(op->Prev->Pt.Y - op->Pt.Y);
+    op = op->Next;
+  } while (op != startOp);
+  return a * 0.5;
+}
+//------------------------------------------------------------------------------
+
+double Area(const OutRec &outRec) { return Area(outRec.Pts); }
+//------------------------------------------------------------------------------
+
+bool PointIsVertex(const IntPoint &Pt, OutPt *pp) {
+  OutPt *pp2 = pp;
+  do {
+    if (pp2->Pt == Pt)
+      return true;
+    pp2 = pp2->Next;
+  } while (pp2 != pp);
+  return false;
+}
+//------------------------------------------------------------------------------
+
+// See "The Point in Polygon Problem for Arbitrary Polygons" by Hormann &
+// Agathos
+// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.88.5498&rep=rep1&type=pdf
+int PointInPolygon(const IntPoint &pt, const Path &path) {
+  // returns 0 if false, +1 if true, -1 if pt ON polygon boundary
+  int result = 0;
+  size_t cnt = path.size();
+  if (cnt < 3)
+    return 0;
+  IntPoint ip = path[0];
+  for (size_t i = 1; i <= cnt; ++i) {
+    IntPoint ipNext = (i == cnt ? path[0] : path[i]);
+    if (ipNext.Y == pt.Y) {
+      if ((ipNext.X == pt.X) ||
+          (ip.Y == pt.Y && ((ipNext.X > pt.X) == (ip.X < pt.X))))
+        return -1;
+    }
+    if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y)) {
+      if (ip.X >= pt.X) {
+        if (ipNext.X > pt.X)
+          result = 1 - result;
+        else {
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
+                     (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (ipNext.Y > ip.Y))
+            result = 1 - result;
+        }
+      } else {
+        if (ipNext.X > pt.X) {
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
+                     (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (ipNext.Y > ip.Y))
+            result = 1 - result;
+        }
+      }
+    }
+    ip = ipNext;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+int PointInPolygon(const IntPoint &pt, OutPt *op) {
+  // returns 0 if false, +1 if true, -1 if pt ON polygon boundary
+  int result = 0;
+  OutPt *startOp = op;
+  for (;;) {
+    if (op->Next->Pt.Y == pt.Y) {
+      if ((op->Next->Pt.X == pt.X) ||
+          (op->Pt.Y == pt.Y && ((op->Next->Pt.X > pt.X) == (op->Pt.X < pt.X))))
+        return -1;
+    }
+    if ((op->Pt.Y < pt.Y) != (op->Next->Pt.Y < pt.Y)) {
+      if (op->Pt.X >= pt.X) {
+        if (op->Next->Pt.X > pt.X)
+          result = 1 - result;
+        else {
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
+                     (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y))
+            result = 1 - result;
+        }
+      } else {
+        if (op->Next->Pt.X > pt.X) {
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
+                     (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y))
+            result = 1 - result;
+        }
+      }
+    }
+    op = op->Next;
+    if (startOp == op)
+      break;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2) {
+  OutPt *op = OutPt1;
+  do {
+    // nb: PointInPolygon returns 0 if false, +1 if true, -1 if pt on polygon
+    int res = PointInPolygon(op->Pt, OutPt2);
+    if (res >= 0)
+      return res > 0;
+    op = op->Next;
+  } while (op != OutPt1);
+  return true;
+}
+//----------------------------------------------------------------------
+
+bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range) {
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(e1.Top.Y - e1.Bot.Y, e2.Top.X - e2.Bot.X) ==
+           Int128Mul(e1.Top.X - e1.Bot.X, e2.Top.Y - e2.Bot.Y);
+  else
+#endif
+    return (e1.Top.Y - e1.Bot.Y) * (e2.Top.X - e2.Bot.X) ==
+           (e1.Top.X - e1.Bot.X) * (e2.Top.Y - e2.Bot.Y);
+}
+//------------------------------------------------------------------------------
+
+bool SlopesEqual(const IntPoint pt1, const IntPoint pt2, const IntPoint pt3,
+                 bool UseFullInt64Range) {
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(pt1.Y - pt2.Y, pt2.X - pt3.X) ==
+           Int128Mul(pt1.X - pt2.X, pt2.Y - pt3.Y);
+  else
+#endif
+    return (pt1.Y - pt2.Y) * (pt2.X - pt3.X) ==
+           (pt1.X - pt2.X) * (pt2.Y - pt3.Y);
+}
+//------------------------------------------------------------------------------
+
+bool SlopesEqual(const IntPoint pt1, const IntPoint pt2, const IntPoint pt3,
+                 const IntPoint pt4, bool UseFullInt64Range) {
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(pt1.Y - pt2.Y, pt3.X - pt4.X) ==
+           Int128Mul(pt1.X - pt2.X, pt3.Y - pt4.Y);
+  else
+#endif
+    return (pt1.Y - pt2.Y) * (pt3.X - pt4.X) ==
+           (pt1.X - pt2.X) * (pt3.Y - pt4.Y);
+}
+//------------------------------------------------------------------------------
+
+inline bool IsHorizontal(TEdge &e) { return e.Dx == HORIZONTAL; }
+//------------------------------------------------------------------------------
+
+inline double GetDx(const IntPoint pt1, const IntPoint pt2) {
+  return (pt1.Y == pt2.Y) ? HORIZONTAL
+                          : (double)(pt2.X - pt1.X) / (pt2.Y - pt1.Y);
+}
+//---------------------------------------------------------------------------
+
+inline void SetDx(TEdge &e) {
+  cInt dy = (e.Top.Y - e.Bot.Y);
+  if (dy == 0)
+    e.Dx = HORIZONTAL;
+  else
+    e.Dx = (double)(e.Top.X - e.Bot.X) / dy;
+}
+//---------------------------------------------------------------------------
+
+inline void SwapSides(TEdge &Edge1, TEdge &Edge2) {
+  EdgeSide Side = Edge1.Side;
+  Edge1.Side = Edge2.Side;
+  Edge2.Side = Side;
+}
+//------------------------------------------------------------------------------
+
+inline void SwapPolyIndexes(TEdge &Edge1, TEdge &Edge2) {
+  int OutIdx = Edge1.OutIdx;
+  Edge1.OutIdx = Edge2.OutIdx;
+  Edge2.OutIdx = OutIdx;
+}
+//------------------------------------------------------------------------------
+
+inline cInt TopX(TEdge &edge, const cInt currentY) {
+  return (currentY == edge.Top.Y)
+             ? edge.Top.X
+             : edge.Bot.X + Round(edge.Dx * (currentY - edge.Bot.Y));
+}
+//------------------------------------------------------------------------------
+
+void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip) {
+#ifdef use_xyz
+  ip.Z = 0;
+#endif
+
+  double b1, b2;
+  if (Edge1.Dx == Edge2.Dx) {
+    ip.Y = Edge1.Curr.Y;
+    ip.X = TopX(Edge1, ip.Y);
+    return;
+  } else if (Edge1.Dx == 0) {
+    ip.X = Edge1.Bot.X;
+    if (IsHorizontal(Edge2))
+      ip.Y = Edge2.Bot.Y;
+    else {
+      b2 = Edge2.Bot.Y - (Edge2.Bot.X / Edge2.Dx);
+      ip.Y = Round(ip.X / Edge2.Dx + b2);
+    }
+  } else if (Edge2.Dx == 0) {
+    ip.X = Edge2.Bot.X;
+    if (IsHorizontal(Edge1))
+      ip.Y = Edge1.Bot.Y;
+    else {
+      b1 = Edge1.Bot.Y - (Edge1.Bot.X / Edge1.Dx);
+      ip.Y = Round(ip.X / Edge1.Dx + b1);
+    }
+  } else {
+    b1 = Edge1.Bot.X - Edge1.Bot.Y * Edge1.Dx;
+    b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx;
+    double q = (b2 - b1) / (Edge1.Dx - Edge2.Dx);
+    ip.Y = Round(q);
+    if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
+      ip.X = Round(Edge1.Dx * q + b1);
+    else
+      ip.X = Round(Edge2.Dx * q + b2);
+  }
+
+  if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y) {
+    if (Edge1.Top.Y > Edge2.Top.Y)
+      ip.Y = Edge1.Top.Y;
+    else
+      ip.Y = Edge2.Top.Y;
+    if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
+      ip.X = TopX(Edge1, ip.Y);
+    else
+      ip.X = TopX(Edge2, ip.Y);
+  }
+  // finally, don't allow 'ip' to be BELOW curr.Y (ie bottom of scanbeam) ...
+  if (ip.Y > Edge1.Curr.Y) {
+    ip.Y = Edge1.Curr.Y;
+    // use the more vertical edge to derive X ...
+    if (std::fabs(Edge1.Dx) > std::fabs(Edge2.Dx))
+      ip.X = TopX(Edge2, ip.Y);
+    else
+      ip.X = TopX(Edge1, ip.Y);
+  }
+}
+//------------------------------------------------------------------------------
+
+void ReversePolyPtLinks(OutPt *pp) {
+  if (!pp)
+    return;
+  OutPt *pp1, *pp2;
+  pp1 = pp;
+  do {
+    pp2 = pp1->Next;
+    pp1->Next = pp1->Prev;
+    pp1->Prev = pp2;
+    pp1 = pp2;
+  } while (pp1 != pp);
+}
+//------------------------------------------------------------------------------
+
+void DisposeOutPts(OutPt *&pp) {
+  if (pp == 0)
+    return;
+  pp->Prev->Next = 0;
+  while (pp) {
+    OutPt *tmpPp = pp;
+    pp = pp->Next;
+    delete tmpPp;
+  }
+}
+//------------------------------------------------------------------------------
+
+inline void InitEdge(TEdge *e, TEdge *eNext, TEdge *ePrev, const IntPoint &Pt) {
+  std::memset(e, int(0), sizeof(TEdge));
+  e->Next = eNext;
+  e->Prev = ePrev;
+  e->Curr = Pt;
+  e->OutIdx = Unassigned;
+}
+//------------------------------------------------------------------------------
+
+void InitEdge2(TEdge &e, PolyType Pt) {
+  if (e.Curr.Y >= e.Next->Curr.Y) {
+    e.Bot = e.Curr;
+    e.Top = e.Next->Curr;
+  } else {
+    e.Top = e.Curr;
+    e.Bot = e.Next->Curr;
+  }
+  SetDx(e);
+  e.PolyTyp = Pt;
+}
+//------------------------------------------------------------------------------
+
+TEdge *RemoveEdge(TEdge *e) {
+  // removes e from double_linked_list (but without removing from memory)
+  e->Prev->Next = e->Next;
+  e->Next->Prev = e->Prev;
+  TEdge *result = e->Next;
+  e->Prev = 0; // flag as removed (see ClipperBase.Clear)
+  return result;
+}
+//------------------------------------------------------------------------------
+
+inline void ReverseHorizontal(TEdge &e) {
+  // swap horizontal edges' Top and Bottom x's so they follow the natural
+  // progression of the bounds - ie so their xbots will align with the
+  // adjoining lower edge. [Helpful in the ProcessHorizontal() method.]
+  std::swap(e.Top.X, e.Bot.X);
+#ifdef use_xyz
+  std::swap(e.Top.Z, e.Bot.Z);
+#endif
+}
+//------------------------------------------------------------------------------
+
+void SwapPoints(IntPoint &pt1, IntPoint &pt2) {
+  IntPoint tmp = pt1;
+  pt1 = pt2;
+  pt2 = tmp;
+}
+//------------------------------------------------------------------------------
+
+bool GetOverlapSegment(IntPoint pt1a, IntPoint pt1b, IntPoint pt2a,
+                       IntPoint pt2b, IntPoint &pt1, IntPoint &pt2) {
+  // precondition: segments are Collinear.
+  if (Abs(pt1a.X - pt1b.X) > Abs(pt1a.Y - pt1b.Y)) {
+    if (pt1a.X > pt1b.X)
+      SwapPoints(pt1a, pt1b);
+    if (pt2a.X > pt2b.X)
+      SwapPoints(pt2a, pt2b);
+    if (pt1a.X > pt2a.X)
+      pt1 = pt1a;
+    else
+      pt1 = pt2a;
+    if (pt1b.X < pt2b.X)
+      pt2 = pt1b;
+    else
+      pt2 = pt2b;
+    return pt1.X < pt2.X;
+  } else {
+    if (pt1a.Y < pt1b.Y)
+      SwapPoints(pt1a, pt1b);
+    if (pt2a.Y < pt2b.Y)
+      SwapPoints(pt2a, pt2b);
+    if (pt1a.Y < pt2a.Y)
+      pt1 = pt1a;
+    else
+      pt1 = pt2a;
+    if (pt1b.Y > pt2b.Y)
+      pt2 = pt1b;
+    else
+      pt2 = pt2b;
+    return pt1.Y > pt2.Y;
+  }
+}
+//------------------------------------------------------------------------------
+
+bool FirstIsBottomPt(const OutPt *btmPt1, const OutPt *btmPt2) {
+  OutPt *p = btmPt1->Prev;
+  while ((p->Pt == btmPt1->Pt) && (p != btmPt1))
+    p = p->Prev;
+  double dx1p = std::fabs(GetDx(btmPt1->Pt, p->Pt));
+  p = btmPt1->Next;
+  while ((p->Pt == btmPt1->Pt) && (p != btmPt1))
+    p = p->Next;
+  double dx1n = std::fabs(GetDx(btmPt1->Pt, p->Pt));
+
+  p = btmPt2->Prev;
+  while ((p->Pt == btmPt2->Pt) && (p != btmPt2))
+    p = p->Prev;
+  double dx2p = std::fabs(GetDx(btmPt2->Pt, p->Pt));
+  p = btmPt2->Next;
+  while ((p->Pt == btmPt2->Pt) && (p != btmPt2))
+    p = p->Next;
+  double dx2n = std::fabs(GetDx(btmPt2->Pt, p->Pt));
+
+  if (std::max(dx1p, dx1n) == std::max(dx2p, dx2n) &&
+      std::min(dx1p, dx1n) == std::min(dx2p, dx2n))
+    return Area(btmPt1) > 0; // if otherwise identical use orientation
+  else
+    return (dx1p >= dx2p && dx1p >= dx2n) || (dx1n >= dx2p && dx1n >= dx2n);
+}
+//------------------------------------------------------------------------------
+
+OutPt *GetBottomPt(OutPt *pp) {
+  OutPt *dups = 0;
+  OutPt *p = pp->Next;
+  while (p != pp) {
+    if (p->Pt.Y > pp->Pt.Y) {
+      pp = p;
+      dups = 0;
+    } else if (p->Pt.Y == pp->Pt.Y && p->Pt.X <= pp->Pt.X) {
+      if (p->Pt.X < pp->Pt.X) {
+        dups = 0;
+        pp = p;
+      } else {
+        if (p->Next != pp && p->Prev != pp)
+          dups = p;
+      }
+    }
+    p = p->Next;
+  }
+  if (dups) {
+    // there appears to be at least 2 vertices at BottomPt so ...
+    while (dups != p) {
+      if (!FirstIsBottomPt(p, dups))
+        pp = dups;
+      dups = dups->Next;
+      while (dups->Pt != pp->Pt)
+        dups = dups->Next;
+    }
+  }
+  return pp;
+}
+//------------------------------------------------------------------------------
+
+bool Pt2IsBetweenPt1AndPt3(const IntPoint pt1, const IntPoint pt2,
+                           const IntPoint pt3) {
+  if ((pt1 == pt3) || (pt1 == pt2) || (pt3 == pt2))
+    return false;
+  else if (pt1.X != pt3.X)
+    return (pt2.X > pt1.X) == (pt2.X < pt3.X);
+  else
+    return (pt2.Y > pt1.Y) == (pt2.Y < pt3.Y);
+}
+//------------------------------------------------------------------------------
+
+bool HorzSegmentsOverlap(cInt seg1a, cInt seg1b, cInt seg2a, cInt seg2b) {
+  if (seg1a > seg1b)
+    std::swap(seg1a, seg1b);
+  if (seg2a > seg2b)
+    std::swap(seg2a, seg2b);
+  return (seg1a < seg2b) && (seg2a < seg1b);
+}
+
+//------------------------------------------------------------------------------
+// ClipperBase class methods ...
+//------------------------------------------------------------------------------
+
+ClipperBase::ClipperBase() // constructor
+{
+  m_CurrentLM = m_MinimaList.begin(); // begin() == end() here
+  m_UseFullRange = false;
+}
+//------------------------------------------------------------------------------
+
+ClipperBase::~ClipperBase() // destructor
+{
+  Clear();
+}
+//------------------------------------------------------------------------------
+
+void RangeTest(const IntPoint &Pt, bool &useFullRange) {
+  if (useFullRange) {
+    if (Pt.X > hiRange || Pt.Y > hiRange || -Pt.X > hiRange || -Pt.Y > hiRange)
+      throw clipperException("Coordinate outside allowed range");
+  } else if (Pt.X > loRange || Pt.Y > loRange || -Pt.X > loRange ||
+             -Pt.Y > loRange) {
+    useFullRange = true;
+    RangeTest(Pt, useFullRange);
+  }
+}
+//------------------------------------------------------------------------------
+
+TEdge *FindNextLocMin(TEdge *E) {
+  for (;;) {
+    while (E->Bot != E->Prev->Bot || E->Curr == E->Top)
+      E = E->Next;
+    if (!IsHorizontal(*E) && !IsHorizontal(*E->Prev))
+      break;
+    while (IsHorizontal(*E->Prev))
+      E = E->Prev;
+    TEdge *E2 = E;
+    while (IsHorizontal(*E))
+      E = E->Next;
+    if (E->Top.Y == E->Prev->Bot.Y)
+      continue; // ie just an intermediate horz.
+    if (E2->Prev->Bot.X < E->Bot.X)
+      E = E2;
+    break;
+  }
+  return E;
+}
+//------------------------------------------------------------------------------
+
+TEdge *ClipperBase::ProcessBound(TEdge *E, bool NextIsForward) {
+  TEdge *Result = E;
+  TEdge *Horz = 0;
+
+  if (E->OutIdx == Skip) {
+    // if edges still remain in the current bound beyond the skip edge then
+    // create another LocMin and call ProcessBound once more
+    if (NextIsForward) {
+      while (E->Top.Y == E->Next->Bot.Y)
+        E = E->Next;
+      // don't include top horizontals when parsing a bound a second time,
+      // they will be contained in the opposite bound ...
+      while (E != Result && IsHorizontal(*E))
+        E = E->Prev;
+    } else {
+      while (E->Top.Y == E->Prev->Bot.Y)
+        E = E->Prev;
+      while (E != Result && IsHorizontal(*E))
+        E = E->Next;
+    }
+
+    if (E == Result) {
+      if (NextIsForward)
+        Result = E->Next;
+      else
+        Result = E->Prev;
+    } else {
+      // there are more edges in the bound beyond result starting with E
+      if (NextIsForward)
+        E = Result->Next;
+      else
+        E = Result->Prev;
+      MinimaList::value_type locMin;
+      locMin.Y = E->Bot.Y;
+      locMin.LeftBound = 0;
+      locMin.RightBound = E;
+      E->WindDelta = 0;
+      Result = ProcessBound(E, NextIsForward);
+      m_MinimaList.push_back(locMin);
+    }
+    return Result;
+  }
+
+  TEdge *EStart;
+
+  if (IsHorizontal(*E)) {
+    // We need to be careful with open paths because this may not be a
+    // true local minima (ie E may be following a skip edge).
+    // Also, consecutive horz. edges may start heading left before going right.
+    if (NextIsForward)
+      EStart = E->Prev;
+    else
+      EStart = E->Next;
+    if (IsHorizontal(*EStart)) // ie an adjoining horizontal skip edge
+    {
+      if (EStart->Bot.X != E->Bot.X && EStart->Top.X != E->Bot.X)
+        ReverseHorizontal(*E);
+    } else if (EStart->Bot.X != E->Bot.X)
+      ReverseHorizontal(*E);
+  }
+
+  EStart = E;
+  if (NextIsForward) {
+    while (Result->Top.Y == Result->Next->Bot.Y && Result->Next->OutIdx != Skip)
+      Result = Result->Next;
+    if (IsHorizontal(*Result) && Result->Next->OutIdx != Skip) {
+      // nb: at the top of a bound, horizontals are added to the bound
+      // only when the preceding edge attaches to the horizontal's left vertex
+      // unless a Skip edge is encountered when that becomes the top divide
+      Horz = Result;
+      while (IsHorizontal(*Horz->Prev))
+        Horz = Horz->Prev;
+      if (Horz->Prev->Top.X > Result->Next->Top.X)
+        Result = Horz->Prev;
+    }
+    while (E != Result) {
+      E->NextInLML = E->Next;
+      if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X)
+        ReverseHorizontal(*E);
+      E = E->Next;
+    }
+    if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X)
+      ReverseHorizontal(*E);
+    Result = Result->Next; // move to the edge just beyond current bound
+  } else {
+    while (Result->Top.Y == Result->Prev->Bot.Y && Result->Prev->OutIdx != Skip)
+      Result = Result->Prev;
+    if (IsHorizontal(*Result) && Result->Prev->OutIdx != Skip) {
+      Horz = Result;
+      while (IsHorizontal(*Horz->Next))
+        Horz = Horz->Next;
+      if (Horz->Next->Top.X == Result->Prev->Top.X ||
+          Horz->Next->Top.X > Result->Prev->Top.X)
+        Result = Horz->Next;
+    }
+
+    while (E != Result) {
+      E->NextInLML = E->Prev;
+      if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
+        ReverseHorizontal(*E);
+      E = E->Prev;
+    }
+    if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
+      ReverseHorizontal(*E);
+    Result = Result->Prev; // move to the edge just beyond current bound
+  }
+
+  return Result;
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::AddPath(const Path &pg, PolyType PolyTyp, bool Closed) {
+#ifdef use_lines
+  if (!Closed && PolyTyp == ptClip)
+    throw clipperException("AddPath: Open paths must be subject.");
+#else
+  if (!Closed)
+    throw clipperException("AddPath: Open paths have been disabled.");
+#endif
+
+  int highI = (int)pg.size() - 1;
+  if (Closed)
+    while (highI > 0 && (pg[highI] == pg[0]))
+      --highI;
+  while (highI > 0 && (pg[highI] == pg[highI - 1]))
+    --highI;
+  if ((Closed && highI < 2) || (!Closed && highI < 1))
+    return false;
+
+  // create a new edge array ...
+  TEdge *edges = new TEdge[highI + 1];
+
+  bool IsFlat = true;
+  // 1. Basic (first) edge initialization ...
+  try {
+    edges[1].Curr = pg[1];
+    RangeTest(pg[0], m_UseFullRange);
+    RangeTest(pg[highI], m_UseFullRange);
+    InitEdge(&edges[0], &edges[1], &edges[highI], pg[0]);
+    InitEdge(&edges[highI], &edges[0], &edges[highI - 1], pg[highI]);
+    for (int i = highI - 1; i >= 1; --i) {
+      RangeTest(pg[i], m_UseFullRange);
+      InitEdge(&edges[i], &edges[i + 1], &edges[i - 1], pg[i]);
+    }
+  } catch (...) {
+    delete[] edges;
+    throw; // range test fails
+  }
+  TEdge *eStart = &edges[0];
+
+  // 2. Remove duplicate vertices, and (when closed) collinear edges ...
+  TEdge *E = eStart, *eLoopStop = eStart;
+  for (;;) {
+    // nb: allows matching start and end points when not Closed ...
+    if (E->Curr == E->Next->Curr && (Closed || E->Next != eStart)) {
+      if (E == E->Next)
+        break;
+      if (E == eStart)
+        eStart = E->Next;
+      E = RemoveEdge(E);
+      eLoopStop = E;
+      continue;
+    }
+    if (E->Prev == E->Next)
+      break; // only two vertices
+    else if (Closed &&
+             SlopesEqual(E->Prev->Curr, E->Curr, E->Next->Curr,
+                         m_UseFullRange) &&
+             (!m_PreserveCollinear ||
+              !Pt2IsBetweenPt1AndPt3(E->Prev->Curr, E->Curr, E->Next->Curr))) {
+      // Collinear edges are allowed for open paths but in closed paths
+      // the default is to merge adjacent collinear edges into a single edge.
+      // However, if the PreserveCollinear property is enabled, only overlapping
+      // collinear edges (ie spikes) will be removed from closed paths.
+      if (E == eStart)
+        eStart = E->Next;
+      E = RemoveEdge(E);
+      E = E->Prev;
+      eLoopStop = E;
+      continue;
+    }
+    E = E->Next;
+    if ((E == eLoopStop) || (!Closed && E->Next == eStart))
+      break;
+  }
+
+  if ((!Closed && (E == E->Next)) || (Closed && (E->Prev == E->Next))) {
+    delete[] edges;
+    return false;
+  }
+
+  if (!Closed) {
+    m_HasOpenPaths = true;
+    eStart->Prev->OutIdx = Skip;
+  }
+
+  // 3. Do second stage of edge initialization ...
+  E = eStart;
+  do {
+    InitEdge2(*E, PolyTyp);
+    E = E->Next;
+    if (IsFlat && E->Curr.Y != eStart->Curr.Y)
+      IsFlat = false;
+  } while (E != eStart);
+
+  // 4. Finally, add edge bounds to LocalMinima list ...
+
+  // Totally flat paths must be handled differently when adding them
+  // to LocalMinima list to avoid endless loops etc ...
+  if (IsFlat) {
+    if (Closed) {
+      delete[] edges;
+      return false;
+    }
+    E->Prev->OutIdx = Skip;
+    MinimaList::value_type locMin;
+    locMin.Y = E->Bot.Y;
+    locMin.LeftBound = 0;
+    locMin.RightBound = E;
+    locMin.RightBound->Side = esRight;
+    locMin.RightBound->WindDelta = 0;
+    for (;;) {
+      if (E->Bot.X != E->Prev->Top.X)
+        ReverseHorizontal(*E);
+      if (E->Next->OutIdx == Skip)
+        break;
+      E->NextInLML = E->Next;
+      E = E->Next;
+    }
+    m_MinimaList.push_back(locMin);
+    m_edges.push_back(edges);
+    return true;
+  }
+
+  m_edges.push_back(edges);
+  bool leftBoundIsForward;
+  TEdge *EMin = 0;
+
+  // workaround to avoid an endless loop in the while loop below when
+  // open paths have matching start and end points ...
+  if (E->Prev->Bot == E->Prev->Top)
+    E = E->Next;
+
+  for (;;) {
+    E = FindNextLocMin(E);
+    if (E == EMin)
+      break;
+    else if (!EMin)
+      EMin = E;
+
+    // E and E.Prev now share a local minima (left aligned if horizontal).
+    // Compare their slopes to find which starts which bound ...
+    MinimaList::value_type locMin;
+    locMin.Y = E->Bot.Y;
+    if (E->Dx < E->Prev->Dx) {
+      locMin.LeftBound = E->Prev;
+      locMin.RightBound = E;
+      leftBoundIsForward = false; // Q.nextInLML = Q.prev
+    } else {
+      locMin.LeftBound = E;
+      locMin.RightBound = E->Prev;
+      leftBoundIsForward = true; // Q.nextInLML = Q.next
+    }
+
+    if (!Closed)
+      locMin.LeftBound->WindDelta = 0;
+    else if (locMin.LeftBound->Next == locMin.RightBound)
+      locMin.LeftBound->WindDelta = -1;
+    else
+      locMin.LeftBound->WindDelta = 1;
+    locMin.RightBound->WindDelta = -locMin.LeftBound->WindDelta;
+
+    E = ProcessBound(locMin.LeftBound, leftBoundIsForward);
+    if (E->OutIdx == Skip)
+      E = ProcessBound(E, leftBoundIsForward);
+
+    TEdge *E2 = ProcessBound(locMin.RightBound, !leftBoundIsForward);
+    if (E2->OutIdx == Skip)
+      E2 = ProcessBound(E2, !leftBoundIsForward);
+
+    if (locMin.LeftBound->OutIdx == Skip)
+      locMin.LeftBound = 0;
+    else if (locMin.RightBound->OutIdx == Skip)
+      locMin.RightBound = 0;
+    m_MinimaList.push_back(locMin);
+    if (!leftBoundIsForward)
+      E = E2;
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed) {
+  bool result = false;
+  for (Paths::size_type i = 0; i < ppg.size(); ++i)
+    if (AddPath(ppg[i], PolyTyp, Closed))
+      result = true;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::Clear() {
+  DisposeLocalMinimaList();
+  for (EdgeList::size_type i = 0; i < m_edges.size(); ++i) {
+    TEdge *edges = m_edges[i];
+    delete[] edges;
+  }
+  m_edges.clear();
+  m_UseFullRange = false;
+  m_HasOpenPaths = false;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::Reset() {
+  m_CurrentLM = m_MinimaList.begin();
+  if (m_CurrentLM == m_MinimaList.end())
+    return; // ie nothing to process
+  std::sort(m_MinimaList.begin(), m_MinimaList.end(), LocMinSorter());
+
+  m_Scanbeam = ScanbeamList(); // clears/resets priority_queue
+  // reset all edges ...
+  for (MinimaList::iterator lm = m_MinimaList.begin(); lm != m_MinimaList.end();
+       ++lm) {
+    InsertScanbeam(lm->Y);
+    TEdge *e = lm->LeftBound;
+    if (e) {
+      e->Curr = e->Bot;
+      e->Side = esLeft;
+      e->OutIdx = Unassigned;
+    }
+
+    e = lm->RightBound;
+    if (e) {
+      e->Curr = e->Bot;
+      e->Side = esRight;
+      e->OutIdx = Unassigned;
+    }
+  }
+  m_ActiveEdges = 0;
+  m_CurrentLM = m_MinimaList.begin();
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DisposeLocalMinimaList() {
+  m_MinimaList.clear();
+  m_CurrentLM = m_MinimaList.begin();
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::PopLocalMinima(cInt Y, const LocalMinimum *&locMin) {
+  if (m_CurrentLM == m_MinimaList.end() || (*m_CurrentLM).Y != Y)
+    return false;
+  locMin = &(*m_CurrentLM);
+  ++m_CurrentLM;
+  return true;
+}
+//------------------------------------------------------------------------------
+
+IntRect ClipperBase::GetBounds() {
+  IntRect result;
+  MinimaList::iterator lm = m_MinimaList.begin();
+  if (lm == m_MinimaList.end()) {
+    result.left = result.top = result.right = result.bottom = 0;
+    return result;
+  }
+  result.left = lm->LeftBound->Bot.X;
+  result.top = lm->LeftBound->Bot.Y;
+  result.right = lm->LeftBound->Bot.X;
+  result.bottom = lm->LeftBound->Bot.Y;
+  while (lm != m_MinimaList.end()) {
+    // todo - needs fixing for open paths
+    result.bottom = std::max(result.bottom, lm->LeftBound->Bot.Y);
+    TEdge *e = lm->LeftBound;
+    for (;;) {
+      TEdge *bottomE = e;
+      while (e->NextInLML) {
+        if (e->Bot.X < result.left)
+          result.left = e->Bot.X;
+        if (e->Bot.X > result.right)
+          result.right = e->Bot.X;
+        e = e->NextInLML;
+      }
+      result.left = std::min(result.left, e->Bot.X);
+      result.right = std::max(result.right, e->Bot.X);
+      result.left = std::min(result.left, e->Top.X);
+      result.right = std::max(result.right, e->Top.X);
+      result.top = std::min(result.top, e->Top.Y);
+      if (bottomE == lm->LeftBound)
+        e = lm->RightBound;
+      else
+        break;
+    }
+    ++lm;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::InsertScanbeam(const cInt Y) { m_Scanbeam.push(Y); }
+//------------------------------------------------------------------------------
+
+bool ClipperBase::PopScanbeam(cInt &Y) {
+  if (m_Scanbeam.empty())
+    return false;
+  Y = m_Scanbeam.top();
+  m_Scanbeam.pop();
+  while (!m_Scanbeam.empty() && Y == m_Scanbeam.top()) {
+    m_Scanbeam.pop();
+  } // Pop duplicates.
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DisposeAllOutRecs() {
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+    DisposeOutRec(i);
+  m_PolyOuts.clear();
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DisposeOutRec(PolyOutList::size_type index) {
+  OutRec *outRec = m_PolyOuts[index];
+  if (outRec->Pts)
+    DisposeOutPts(outRec->Pts);
+  delete outRec;
+  m_PolyOuts[index] = 0;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DeleteFromAEL(TEdge *e) {
+  TEdge *AelPrev = e->PrevInAEL;
+  TEdge *AelNext = e->NextInAEL;
+  if (!AelPrev && !AelNext && (e != m_ActiveEdges))
+    return; // already deleted
+  if (AelPrev)
+    AelPrev->NextInAEL = AelNext;
+  else
+    m_ActiveEdges = AelNext;
+  if (AelNext)
+    AelNext->PrevInAEL = AelPrev;
+  e->NextInAEL = 0;
+  e->PrevInAEL = 0;
+}
+//------------------------------------------------------------------------------
+
+OutRec *ClipperBase::CreateOutRec() {
+  OutRec *result = new OutRec;
+  result->IsHole = false;
+  result->IsOpen = false;
+  result->FirstLeft = 0;
+  result->Pts = 0;
+  result->BottomPt = 0;
+  result->PolyNd = 0;
+  m_PolyOuts.push_back(result);
+  result->Idx = (int)m_PolyOuts.size() - 1;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::SwapPositionsInAEL(TEdge *Edge1, TEdge *Edge2) {
+  // check that one or other edge hasn't already been removed from AEL ...
+  if (Edge1->NextInAEL == Edge1->PrevInAEL ||
+      Edge2->NextInAEL == Edge2->PrevInAEL)
+    return;
+
+  if (Edge1->NextInAEL == Edge2) {
+    TEdge *Next = Edge2->NextInAEL;
+    if (Next)
+      Next->PrevInAEL = Edge1;
+    TEdge *Prev = Edge1->PrevInAEL;
+    if (Prev)
+      Prev->NextInAEL = Edge2;
+    Edge2->PrevInAEL = Prev;
+    Edge2->NextInAEL = Edge1;
+    Edge1->PrevInAEL = Edge2;
+    Edge1->NextInAEL = Next;
+  } else if (Edge2->NextInAEL == Edge1) {
+    TEdge *Next = Edge1->NextInAEL;
+    if (Next)
+      Next->PrevInAEL = Edge2;
+    TEdge *Prev = Edge2->PrevInAEL;
+    if (Prev)
+      Prev->NextInAEL = Edge1;
+    Edge1->PrevInAEL = Prev;
+    Edge1->NextInAEL = Edge2;
+    Edge2->PrevInAEL = Edge1;
+    Edge2->NextInAEL = Next;
+  } else {
+    TEdge *Next = Edge1->NextInAEL;
+    TEdge *Prev = Edge1->PrevInAEL;
+    Edge1->NextInAEL = Edge2->NextInAEL;
+    if (Edge1->NextInAEL)
+      Edge1->NextInAEL->PrevInAEL = Edge1;
+    Edge1->PrevInAEL = Edge2->PrevInAEL;
+    if (Edge1->PrevInAEL)
+      Edge1->PrevInAEL->NextInAEL = Edge1;
+    Edge2->NextInAEL = Next;
+    if (Edge2->NextInAEL)
+      Edge2->NextInAEL->PrevInAEL = Edge2;
+    Edge2->PrevInAEL = Prev;
+    if (Edge2->PrevInAEL)
+      Edge2->PrevInAEL->NextInAEL = Edge2;
+  }
+
+  if (!Edge1->PrevInAEL)
+    m_ActiveEdges = Edge1;
+  else if (!Edge2->PrevInAEL)
+    m_ActiveEdges = Edge2;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::UpdateEdgeIntoAEL(TEdge *&e) {
+  if (!e->NextInLML)
+    throw clipperException("UpdateEdgeIntoAEL: invalid call");
+
+  e->NextInLML->OutIdx = e->OutIdx;
+  TEdge *AelPrev = e->PrevInAEL;
+  TEdge *AelNext = e->NextInAEL;
+  if (AelPrev)
+    AelPrev->NextInAEL = e->NextInLML;
+  else
+    m_ActiveEdges = e->NextInLML;
+  if (AelNext)
+    AelNext->PrevInAEL = e->NextInLML;
+  e->NextInLML->Side = e->Side;
+  e->NextInLML->WindDelta = e->WindDelta;
+  e->NextInLML->WindCnt = e->WindCnt;
+  e->NextInLML->WindCnt2 = e->WindCnt2;
+  e = e->NextInLML;
+  e->Curr = e->Bot;
+  e->PrevInAEL = AelPrev;
+  e->NextInAEL = AelNext;
+  if (!IsHorizontal(*e))
+    InsertScanbeam(e->Top.Y);
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::LocalMinimaPending() {
+  return (m_CurrentLM != m_MinimaList.end());
+}
+
+//------------------------------------------------------------------------------
+// TClipper methods ...
+//------------------------------------------------------------------------------
+
+Clipper::Clipper(int initOptions)
+    : ClipperBase() // constructor
+{
+  m_ExecuteLocked = false;
+  m_UseFullRange = false;
+  m_ReverseOutput = ((initOptions & ioReverseSolution) != 0);
+  m_StrictSimple = ((initOptions & ioStrictlySimple) != 0);
+  m_PreserveCollinear = ((initOptions & ioPreserveCollinear) != 0);
+  m_HasOpenPaths = false;
+#ifdef use_xyz
+  m_ZFill = 0;
+#endif
+}
+//------------------------------------------------------------------------------
+
+#ifdef use_xyz
+void Clipper::ZFillFunction(ZFillCallback zFillFunc) { m_ZFill = zFillFunc; }
+//------------------------------------------------------------------------------
+#endif
+
+bool Clipper::Execute(ClipType clipType, Paths &solution,
+                      PolyFillType fillType) {
+  return Execute(clipType, solution, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::Execute(ClipType clipType, PolyTree &polytree,
+                      PolyFillType fillType) {
+  return Execute(clipType, polytree, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::Execute(ClipType clipType, Paths &solution,
+                      PolyFillType subjFillType, PolyFillType clipFillType) {
+  if (m_ExecuteLocked)
+    return false;
+  if (m_HasOpenPaths)
+    throw clipperException(
+        "Error: PolyTree struct is needed for open path clipping.");
+  m_ExecuteLocked = true;
+  solution.resize(0);
+  m_SubjFillType = subjFillType;
+  m_ClipFillType = clipFillType;
+  m_ClipType = clipType;
+  m_UsingPolyTree = false;
+  bool succeeded = ExecuteInternal();
+  if (succeeded)
+    BuildResult(solution);
+  DisposeAllOutRecs();
+  m_ExecuteLocked = false;
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::Execute(ClipType clipType, PolyTree &polytree,
+                      PolyFillType subjFillType, PolyFillType clipFillType) {
+  if (m_ExecuteLocked)
+    return false;
+  m_ExecuteLocked = true;
+  m_SubjFillType = subjFillType;
+  m_ClipFillType = clipFillType;
+  m_ClipType = clipType;
+  m_UsingPolyTree = true;
+  bool succeeded = ExecuteInternal();
+  if (succeeded)
+    BuildResult2(polytree);
+  DisposeAllOutRecs();
+  m_ExecuteLocked = false;
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixHoleLinkage(OutRec &outrec) {
+  // skip OutRecs that (a) contain outermost polygons or
+  //(b) already have the correct owner/child linkage ...
+  if (!outrec.FirstLeft ||
+      (outrec.IsHole != outrec.FirstLeft->IsHole && outrec.FirstLeft->Pts))
+    return;
+
+  OutRec *orfl = outrec.FirstLeft;
+  while (orfl && ((orfl->IsHole == outrec.IsHole) || !orfl->Pts))
+    orfl = orfl->FirstLeft;
+  outrec.FirstLeft = orfl;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::ExecuteInternal() {
+  bool succeeded = true;
+  try {
+    Reset();
+    m_Maxima = MaximaList();
+    m_SortedEdges = 0;
+
+    succeeded = true;
+    cInt botY, topY;
+    if (!PopScanbeam(botY))
+      return false;
+    InsertLocalMinimaIntoAEL(botY);
+    while (PopScanbeam(topY) || LocalMinimaPending()) {
+      ProcessHorizontals();
+      ClearGhostJoins();
+      if (!ProcessIntersections(topY)) {
+        succeeded = false;
+        break;
+      }
+      ProcessEdgesAtTopOfScanbeam(topY);
+      botY = topY;
+      InsertLocalMinimaIntoAEL(botY);
+    }
+  } catch (...) {
+    succeeded = false;
+  }
+
+  if (succeeded) {
+    // fix orientations ...
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+      OutRec *outRec = m_PolyOuts[i];
+      if (!outRec->Pts || outRec->IsOpen)
+        continue;
+      if ((outRec->IsHole ^ m_ReverseOutput) == (Area(*outRec) > 0))
+        ReversePolyPtLinks(outRec->Pts);
+    }
+
+    if (!m_Joins.empty())
+      JoinCommonEdges();
+
+    // unfortunately FixupOutPolygon() must be done after JoinCommonEdges()
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+      OutRec *outRec = m_PolyOuts[i];
+      if (!outRec->Pts)
+        continue;
+      if (outRec->IsOpen)
+        FixupOutPolyline(*outRec);
+      else
+        FixupOutPolygon(*outRec);
+    }
+
+    if (m_StrictSimple)
+      DoSimplePolygons();
+  }
+
+  ClearJoins();
+  ClearGhostJoins();
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::SetWindingCount(TEdge &edge) {
+  TEdge *e = edge.PrevInAEL;
+  // find the edge of the same polytype that immediately preceeds 'edge' in AEL
+  while (e && ((e->PolyTyp != edge.PolyTyp) || (e->WindDelta == 0)))
+    e = e->PrevInAEL;
+  if (!e) {
+    if (edge.WindDelta == 0) {
+      PolyFillType pft =
+          (edge.PolyTyp == ptSubject ? m_SubjFillType : m_ClipFillType);
+      edge.WindCnt = (pft == pftNegative ? -1 : 1);
+    } else
+      edge.WindCnt = edge.WindDelta;
+    edge.WindCnt2 = 0;
+    e = m_ActiveEdges; // ie get ready to calc WindCnt2
+  } else if (edge.WindDelta == 0 && m_ClipType != ctUnion) {
+    edge.WindCnt = 1;
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; // ie get ready to calc WindCnt2
+  } else if (IsEvenOddFillType(edge)) {
+    // EvenOdd filling ...
+    if (edge.WindDelta == 0) {
+      // are we inside a subj polygon ...
+      bool Inside = true;
+      TEdge *e2 = e->PrevInAEL;
+      while (e2) {
+        if (e2->PolyTyp == e->PolyTyp && e2->WindDelta != 0)
+          Inside = !Inside;
+        e2 = e2->PrevInAEL;
+      }
+      edge.WindCnt = (Inside ? 0 : 1);
+    } else {
+      edge.WindCnt = edge.WindDelta;
+    }
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; // ie get ready to calc WindCnt2
+  } else {
+    // nonZero, Positive or Negative filling ...
+    if (e->WindCnt * e->WindDelta < 0) {
+      // prev edge is 'decreasing' WindCount (WC) toward zero
+      // so we're outside the previous polygon ...
+      if (Abs(e->WindCnt) > 1) {
+        // outside prev poly but still inside another.
+        // when reversing direction of prev poly use the same WC
+        if (e->WindDelta * edge.WindDelta < 0)
+          edge.WindCnt = e->WindCnt;
+        // otherwise continue to 'decrease' WC ...
+        else
+          edge.WindCnt = e->WindCnt + edge.WindDelta;
+      } else
+        // now outside all polys of same polytype so set own WC ...
+        edge.WindCnt = (edge.WindDelta == 0 ? 1 : edge.WindDelta);
+    } else {
+      // prev edge is 'increasing' WindCount (WC) away from zero
+      // so we're inside the previous polygon ...
+      if (edge.WindDelta == 0)
+        edge.WindCnt = (e->WindCnt < 0 ? e->WindCnt - 1 : e->WindCnt + 1);
+      // if wind direction is reversing prev then use same WC
+      else if (e->WindDelta * edge.WindDelta < 0)
+        edge.WindCnt = e->WindCnt;
+      // otherwise add to WC ...
+      else
+        edge.WindCnt = e->WindCnt + edge.WindDelta;
+    }
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; // ie get ready to calc WindCnt2
+  }
+
+  // update WindCnt2 ...
+  if (IsEvenOddAltFillType(edge)) {
+    // EvenOdd filling ...
+    while (e != &edge) {
+      if (e->WindDelta != 0)
+        edge.WindCnt2 = (edge.WindCnt2 == 0 ? 1 : 0);
+      e = e->NextInAEL;
+    }
+  } else {
+    // nonZero, Positive or Negative filling ...
+    while (e != &edge) {
+      edge.WindCnt2 += e->WindDelta;
+      e = e->NextInAEL;
+    }
+  }
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::IsEvenOddFillType(const TEdge &edge) const {
+  if (edge.PolyTyp == ptSubject)
+    return m_SubjFillType == pftEvenOdd;
+  else
+    return m_ClipFillType == pftEvenOdd;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::IsEvenOddAltFillType(const TEdge &edge) const {
+  if (edge.PolyTyp == ptSubject)
+    return m_ClipFillType == pftEvenOdd;
+  else
+    return m_SubjFillType == pftEvenOdd;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::IsContributing(const TEdge &edge) const {
+  PolyFillType pft, pft2;
+  if (edge.PolyTyp == ptSubject) {
+    pft = m_SubjFillType;
+    pft2 = m_ClipFillType;
+  } else {
+    pft = m_ClipFillType;
+    pft2 = m_SubjFillType;
+  }
+
+  switch (pft) {
+  case pftEvenOdd:
+    // return false if a subj line has been flagged as inside a subj polygon
+    if (edge.WindDelta == 0 && edge.WindCnt != 1)
+      return false;
+    break;
+  case pftNonZero:
+    if (Abs(edge.WindCnt) != 1)
+      return false;
+    break;
+  case pftPositive:
+    if (edge.WindCnt != 1)
+      return false;
+    break;
+  default: // pftNegative
+    if (edge.WindCnt != -1)
+      return false;
+  }
+
+  switch (m_ClipType) {
+  case ctIntersection:
+    switch (pft2) {
+    case pftEvenOdd:
+    case pftNonZero:
+      return (edge.WindCnt2 != 0);
+    case pftPositive:
+      return (edge.WindCnt2 > 0);
+    default:
+      return (edge.WindCnt2 < 0);
+    }
+    break;
+  case ctUnion:
+    switch (pft2) {
+    case pftEvenOdd:
+    case pftNonZero:
+      return (edge.WindCnt2 == 0);
+    case pftPositive:
+      return (edge.WindCnt2 <= 0);
+    default:
+      return (edge.WindCnt2 >= 0);
+    }
+    break;
+  case ctDifference:
+    if (edge.PolyTyp == ptSubject)
+      switch (pft2) {
+      case pftEvenOdd:
+      case pftNonZero:
+        return (edge.WindCnt2 == 0);
+      case pftPositive:
+        return (edge.WindCnt2 <= 0);
+      default:
+        return (edge.WindCnt2 >= 0);
+      }
+    else
+      switch (pft2) {
+      case pftEvenOdd:
+      case pftNonZero:
+        return (edge.WindCnt2 != 0);
+      case pftPositive:
+        return (edge.WindCnt2 > 0);
+      default:
+        return (edge.WindCnt2 < 0);
+      }
+    break;
+  case ctXor:
+    if (edge.WindDelta == 0) // XOr always contributing unless open
+      switch (pft2) {
+      case pftEvenOdd:
+      case pftNonZero:
+        return (edge.WindCnt2 == 0);
+      case pftPositive:
+        return (edge.WindCnt2 <= 0);
+      default:
+        return (edge.WindCnt2 >= 0);
+      }
+    else
+      return true;
+    break;
+  default:
+    return true;
+  }
+}
+//------------------------------------------------------------------------------
+
+OutPt *Clipper::AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt) {
+  OutPt *result;
+  TEdge *e, *prevE;
+  if (IsHorizontal(*e2) || (e1->Dx > e2->Dx)) {
+    result = AddOutPt(e1, Pt);
+    e2->OutIdx = e1->OutIdx;
+    e1->Side = esLeft;
+    e2->Side = esRight;
+    e = e1;
+    if (e->PrevInAEL == e2)
+      prevE = e2->PrevInAEL;
+    else
+      prevE = e->PrevInAEL;
+  } else {
+    result = AddOutPt(e2, Pt);
+    e1->OutIdx = e2->OutIdx;
+    e1->Side = esRight;
+    e2->Side = esLeft;
+    e = e2;
+    if (e->PrevInAEL == e1)
+      prevE = e1->PrevInAEL;
+    else
+      prevE = e->PrevInAEL;
+  }
+
+  if (prevE && prevE->OutIdx >= 0 && prevE->Top.Y < Pt.Y && e->Top.Y < Pt.Y) {
+    cInt xPrev = TopX(*prevE, Pt.Y);
+    cInt xE = TopX(*e, Pt.Y);
+    if (xPrev == xE && (e->WindDelta != 0) && (prevE->WindDelta != 0) &&
+        SlopesEqual(IntPoint(xPrev, Pt.Y), prevE->Top, IntPoint(xE, Pt.Y),
+                    e->Top, m_UseFullRange)) {
+      OutPt *outPt = AddOutPt(prevE, Pt);
+      AddJoin(result, outPt, e->Top);
+    }
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt) {
+  AddOutPt(e1, Pt);
+  if (e2->WindDelta == 0)
+    AddOutPt(e2, Pt);
+  if (e1->OutIdx == e2->OutIdx) {
+    e1->OutIdx = Unassigned;
+    e2->OutIdx = Unassigned;
+  } else if (e1->OutIdx < e2->OutIdx)
+    AppendPolygon(e1, e2);
+  else
+    AppendPolygon(e2, e1);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddEdgeToSEL(TEdge *edge) {
+  // SEL pointers in PEdge are reused to build a list of horizontal edges.
+  // However, we don't need to worry about order with horizontal edge
+  // processing.
+  if (!m_SortedEdges) {
+    m_SortedEdges = edge;
+    edge->PrevInSEL = 0;
+    edge->NextInSEL = 0;
+  } else {
+    edge->NextInSEL = m_SortedEdges;
+    edge->PrevInSEL = 0;
+    m_SortedEdges->PrevInSEL = edge;
+    m_SortedEdges = edge;
+  }
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::PopEdgeFromSEL(TEdge *&edge) {
+  if (!m_SortedEdges)
+    return false;
+  edge = m_SortedEdges;
+  DeleteFromSEL(m_SortedEdges);
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::CopyAELToSEL() {
+  TEdge *e = m_ActiveEdges;
+  m_SortedEdges = e;
+  while (e) {
+    e->PrevInSEL = e->PrevInAEL;
+    e->NextInSEL = e->NextInAEL;
+    e = e->NextInAEL;
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddJoin(OutPt *op1, OutPt *op2, const IntPoint OffPt) {
+  Join *j = new Join;
+  j->OutPt1 = op1;
+  j->OutPt2 = op2;
+  j->OffPt = OffPt;
+  m_Joins.push_back(j);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ClearJoins() {
+  for (JoinList::size_type i = 0; i < m_Joins.size(); i++)
+    delete m_Joins[i];
+  m_Joins.resize(0);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ClearGhostJoins() {
+  for (JoinList::size_type i = 0; i < m_GhostJoins.size(); i++)
+    delete m_GhostJoins[i];
+  m_GhostJoins.resize(0);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddGhostJoin(OutPt *op, const IntPoint OffPt) {
+  Join *j = new Join;
+  j->OutPt1 = op;
+  j->OutPt2 = 0;
+  j->OffPt = OffPt;
+  m_GhostJoins.push_back(j);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::InsertLocalMinimaIntoAEL(const cInt botY) {
+  const LocalMinimum *lm;
+  while (PopLocalMinima(botY, lm)) {
+    TEdge *lb = lm->LeftBound;
+    TEdge *rb = lm->RightBound;
+
+    OutPt *Op1 = 0;
+    if (!lb || !rb) {
+      // nb: don't insert LB into either AEL or SEL
+      InsertEdgeIntoAEL(rb, 0);
+      SetWindingCount(*rb);
+      if (IsContributing(*rb))
+        Op1 = AddOutPt(rb, rb->Bot);
+      //} else if (!rb) {
+      //  InsertEdgeIntoAEL(lb, 0);
+      //  SetWindingCount(*lb);
+      //  if (IsContributing(*lb))
+      //    Op1 = AddOutPt(lb, lb->Bot);
+      InsertScanbeam(lb->Top.Y);
+    } else {
+      InsertEdgeIntoAEL(lb, 0);
+      InsertEdgeIntoAEL(rb, lb);
+      SetWindingCount(*lb);
+      rb->WindCnt = lb->WindCnt;
+      rb->WindCnt2 = lb->WindCnt2;
+      if (IsContributing(*lb))
+        Op1 = AddLocalMinPoly(lb, rb, lb->Bot);
+      InsertScanbeam(lb->Top.Y);
+    }
+
+    if (rb) {
+      if (IsHorizontal(*rb)) {
+        AddEdgeToSEL(rb);
+        if (rb->NextInLML)
+          InsertScanbeam(rb->NextInLML->Top.Y);
+      } else
+        InsertScanbeam(rb->Top.Y);
+    }
+
+    if (!lb || !rb)
+      continue;
+
+    // if any output polygons share an edge, they'll need joining later ...
+    if (Op1 && IsHorizontal(*rb) && m_GhostJoins.size() > 0 &&
+        (rb->WindDelta != 0)) {
+      for (JoinList::size_type i = 0; i < m_GhostJoins.size(); ++i) {
+        Join *jr = m_GhostJoins[i];
+        // if the horizontal Rb and a 'ghost' horizontal overlap, then convert
+        // the 'ghost' join to a real join ready for later ...
+        if (HorzSegmentsOverlap(jr->OutPt1->Pt.X, jr->OffPt.X, rb->Bot.X,
+                                rb->Top.X))
+          AddJoin(jr->OutPt1, Op1, jr->OffPt);
+      }
+    }
+
+    if (lb->OutIdx >= 0 && lb->PrevInAEL &&
+        lb->PrevInAEL->Curr.X == lb->Bot.X && lb->PrevInAEL->OutIdx >= 0 &&
+        SlopesEqual(lb->PrevInAEL->Bot, lb->PrevInAEL->Top, lb->Curr, lb->Top,
+                    m_UseFullRange) &&
+        (lb->WindDelta != 0) && (lb->PrevInAEL->WindDelta != 0)) {
+      OutPt *Op2 = AddOutPt(lb->PrevInAEL, lb->Bot);
+      AddJoin(Op1, Op2, lb->Top);
+    }
+
+    if (lb->NextInAEL != rb) {
+      if (rb->OutIdx >= 0 && rb->PrevInAEL->OutIdx >= 0 &&
+          SlopesEqual(rb->PrevInAEL->Curr, rb->PrevInAEL->Top, rb->Curr,
+                      rb->Top, m_UseFullRange) &&
+          (rb->WindDelta != 0) && (rb->PrevInAEL->WindDelta != 0)) {
+        OutPt *Op2 = AddOutPt(rb->PrevInAEL, rb->Bot);
+        AddJoin(Op1, Op2, rb->Top);
+      }
+
+      TEdge *e = lb->NextInAEL;
+      if (e) {
+        while (e != rb) {
+          // nb: For calculating winding counts etc, IntersectEdges() assumes
+          // that param1 will be to the Right of param2 ABOVE the intersection
+          // ...
+          IntersectEdges(rb, e, lb->Curr); // order important here
+          e = e->NextInAEL;
+        }
+      }
+    }
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::DeleteFromSEL(TEdge *e) {
+  TEdge *SelPrev = e->PrevInSEL;
+  TEdge *SelNext = e->NextInSEL;
+  if (!SelPrev && !SelNext && (e != m_SortedEdges))
+    return; // already deleted
+  if (SelPrev)
+    SelPrev->NextInSEL = SelNext;
+  else
+    m_SortedEdges = SelNext;
+  if (SelNext)
+    SelNext->PrevInSEL = SelPrev;
+  e->NextInSEL = 0;
+  e->PrevInSEL = 0;
+}
+//------------------------------------------------------------------------------
+
+#ifdef use_xyz
+void Clipper::SetZ(IntPoint &pt, TEdge &e1, TEdge &e2) {
+  if (pt.Z != 0 || !m_ZFill)
+    return;
+  else if (pt == e1.Bot)
+    pt.Z = e1.Bot.Z;
+  else if (pt == e1.Top)
+    pt.Z = e1.Top.Z;
+  else if (pt == e2.Bot)
+    pt.Z = e2.Bot.Z;
+  else if (pt == e2.Top)
+    pt.Z = e2.Top.Z;
+  else
+    (*m_ZFill)(e1.Bot, e1.Top, e2.Bot, e2.Top, pt);
+}
+//------------------------------------------------------------------------------
+#endif
+
+void Clipper::IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &Pt) {
+  bool e1Contributing = (e1->OutIdx >= 0);
+  bool e2Contributing = (e2->OutIdx >= 0);
+
+#ifdef use_xyz
+  SetZ(Pt, *e1, *e2);
+#endif
+
+#ifdef use_lines
+  // if either edge is on an OPEN path ...
+  if (e1->WindDelta == 0 || e2->WindDelta == 0) {
+    // ignore subject-subject open path intersections UNLESS they
+    // are both open paths, AND they are both 'contributing maximas' ...
+    if (e1->WindDelta == 0 && e2->WindDelta == 0)
+      return;
+
+    // if intersecting a subj line with a subj poly ...
+    else if (e1->PolyTyp == e2->PolyTyp && e1->WindDelta != e2->WindDelta &&
+             m_ClipType == ctUnion) {
+      if (e1->WindDelta == 0) {
+        if (e2Contributing) {
+          AddOutPt(e1, Pt);
+          if (e1Contributing)
+            e1->OutIdx = Unassigned;
+        }
+      } else {
+        if (e1Contributing) {
+          AddOutPt(e2, Pt);
+          if (e2Contributing)
+            e2->OutIdx = Unassigned;
+        }
+      }
+    } else if (e1->PolyTyp != e2->PolyTyp) {
+      // toggle subj open path OutIdx on/off when Abs(clip.WndCnt) == 1 ...
+      if ((e1->WindDelta == 0) && abs(e2->WindCnt) == 1 &&
+          (m_ClipType != ctUnion || e2->WindCnt2 == 0)) {
+        AddOutPt(e1, Pt);
+        if (e1Contributing)
+          e1->OutIdx = Unassigned;
+      } else if ((e2->WindDelta == 0) && (abs(e1->WindCnt) == 1) &&
+                 (m_ClipType != ctUnion || e1->WindCnt2 == 0)) {
+        AddOutPt(e2, Pt);
+        if (e2Contributing)
+          e2->OutIdx = Unassigned;
+      }
+    }
+    return;
+  }
+#endif
+
+  // update winding counts...
+  // assumes that e1 will be to the Right of e2 ABOVE the intersection
+  if (e1->PolyTyp == e2->PolyTyp) {
+    if (IsEvenOddFillType(*e1)) {
+      int oldE1WindCnt = e1->WindCnt;
+      e1->WindCnt = e2->WindCnt;
+      e2->WindCnt = oldE1WindCnt;
+    } else {
+      if (e1->WindCnt + e2->WindDelta == 0)
+        e1->WindCnt = -e1->WindCnt;
+      else
+        e1->WindCnt += e2->WindDelta;
+      if (e2->WindCnt - e1->WindDelta == 0)
+        e2->WindCnt = -e2->WindCnt;
+      else
+        e2->WindCnt -= e1->WindDelta;
+    }
+  } else {
+    if (!IsEvenOddFillType(*e2))
+      e1->WindCnt2 += e2->WindDelta;
+    else
+      e1->WindCnt2 = (e1->WindCnt2 == 0) ? 1 : 0;
+    if (!IsEvenOddFillType(*e1))
+      e2->WindCnt2 -= e1->WindDelta;
+    else
+      e2->WindCnt2 = (e2->WindCnt2 == 0) ? 1 : 0;
+  }
+
+  PolyFillType e1FillType, e2FillType, e1FillType2, e2FillType2;
+  if (e1->PolyTyp == ptSubject) {
+    e1FillType = m_SubjFillType;
+    e1FillType2 = m_ClipFillType;
+  } else {
+    e1FillType = m_ClipFillType;
+    e1FillType2 = m_SubjFillType;
+  }
+  if (e2->PolyTyp == ptSubject) {
+    e2FillType = m_SubjFillType;
+    e2FillType2 = m_ClipFillType;
+  } else {
+    e2FillType = m_ClipFillType;
+    e2FillType2 = m_SubjFillType;
+  }
+
+  cInt e1Wc, e2Wc;
+  switch (e1FillType) {
+  case pftPositive:
+    e1Wc = e1->WindCnt;
+    break;
+  case pftNegative:
+    e1Wc = -e1->WindCnt;
+    break;
+  default:
+    e1Wc = Abs(e1->WindCnt);
+  }
+  switch (e2FillType) {
+  case pftPositive:
+    e2Wc = e2->WindCnt;
+    break;
+  case pftNegative:
+    e2Wc = -e2->WindCnt;
+    break;
+  default:
+    e2Wc = Abs(e2->WindCnt);
+  }
+
+  if (e1Contributing && e2Contributing) {
+    if ((e1Wc != 0 && e1Wc != 1) || (e2Wc != 0 && e2Wc != 1) ||
+        (e1->PolyTyp != e2->PolyTyp && m_ClipType != ctXor)) {
+      AddLocalMaxPoly(e1, e2, Pt);
+    } else {
+      AddOutPt(e1, Pt);
+      AddOutPt(e2, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  } else if (e1Contributing) {
+    if (e2Wc == 0 || e2Wc == 1) {
+      AddOutPt(e1, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  } else if (e2Contributing) {
+    if (e1Wc == 0 || e1Wc == 1) {
+      AddOutPt(e2, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  } else if ((e1Wc == 0 || e1Wc == 1) && (e2Wc == 0 || e2Wc == 1)) {
+    // neither edge is currently contributing ...
+
+    cInt e1Wc2, e2Wc2;
+    switch (e1FillType2) {
+    case pftPositive:
+      e1Wc2 = e1->WindCnt2;
+      break;
+    case pftNegative:
+      e1Wc2 = -e1->WindCnt2;
+      break;
+    default:
+      e1Wc2 = Abs(e1->WindCnt2);
+    }
+    switch (e2FillType2) {
+    case pftPositive:
+      e2Wc2 = e2->WindCnt2;
+      break;
+    case pftNegative:
+      e2Wc2 = -e2->WindCnt2;
+      break;
+    default:
+      e2Wc2 = Abs(e2->WindCnt2);
+    }
+
+    if (e1->PolyTyp != e2->PolyTyp) {
+      AddLocalMinPoly(e1, e2, Pt);
+    } else if (e1Wc == 1 && e2Wc == 1)
+      switch (m_ClipType) {
+      case ctIntersection:
+        if (e1Wc2 > 0 && e2Wc2 > 0)
+          AddLocalMinPoly(e1, e2, Pt);
+        break;
+      case ctUnion:
+        if (e1Wc2 <= 0 && e2Wc2 <= 0)
+          AddLocalMinPoly(e1, e2, Pt);
+        break;
+      case ctDifference:
+        if (((e1->PolyTyp == ptClip) && (e1Wc2 > 0) && (e2Wc2 > 0)) ||
+            ((e1->PolyTyp == ptSubject) && (e1Wc2 <= 0) && (e2Wc2 <= 0)))
+          AddLocalMinPoly(e1, e2, Pt);
+        break;
+      case ctXor:
+        AddLocalMinPoly(e1, e2, Pt);
+      }
+    else
+      SwapSides(*e1, *e2);
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::SetHoleState(TEdge *e, OutRec *outrec) {
+  TEdge *e2 = e->PrevInAEL;
+  TEdge *eTmp = 0;
+  while (e2) {
+    if (e2->OutIdx >= 0 && e2->WindDelta != 0) {
+      if (!eTmp)
+        eTmp = e2;
+      else if (eTmp->OutIdx == e2->OutIdx)
+        eTmp = 0;
+    }
+    e2 = e2->PrevInAEL;
+  }
+  if (!eTmp) {
+    outrec->FirstLeft = 0;
+    outrec->IsHole = false;
+  } else {
+    outrec->FirstLeft = m_PolyOuts[eTmp->OutIdx];
+    outrec->IsHole = !outrec->FirstLeft->IsHole;
+  }
+}
+//------------------------------------------------------------------------------
+
+OutRec *GetLowermostRec(OutRec *outRec1, OutRec *outRec2) {
+  // work out which polygon fragment has the correct hole state ...
+  if (!outRec1->BottomPt)
+    outRec1->BottomPt = GetBottomPt(outRec1->Pts);
+  if (!outRec2->BottomPt)
+    outRec2->BottomPt = GetBottomPt(outRec2->Pts);
+  OutPt *OutPt1 = outRec1->BottomPt;
+  OutPt *OutPt2 = outRec2->BottomPt;
+  if (OutPt1->Pt.Y > OutPt2->Pt.Y)
+    return outRec1;
+  else if (OutPt1->Pt.Y < OutPt2->Pt.Y)
+    return outRec2;
+  else if (OutPt1->Pt.X < OutPt2->Pt.X)
+    return outRec1;
+  else if (OutPt1->Pt.X > OutPt2->Pt.X)
+    return outRec2;
+  else if (OutPt1->Next == OutPt1)
+    return outRec2;
+  else if (OutPt2->Next == OutPt2)
+    return outRec1;
+  else if (FirstIsBottomPt(OutPt1, OutPt2))
+    return outRec1;
+  else
+    return outRec2;
+}
+//------------------------------------------------------------------------------
+
+bool OutRec1RightOfOutRec2(OutRec *outRec1, OutRec *outRec2) {
+  do {
+    outRec1 = outRec1->FirstLeft;
+    if (outRec1 == outRec2)
+      return true;
+  } while (outRec1);
+  return false;
+}
+//------------------------------------------------------------------------------
+
+OutRec *Clipper::GetOutRec(int Idx) {
+  OutRec *outrec = m_PolyOuts[Idx];
+  while (outrec != m_PolyOuts[outrec->Idx])
+    outrec = m_PolyOuts[outrec->Idx];
+  return outrec;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AppendPolygon(TEdge *e1, TEdge *e2) {
+  // get the start and ends of both output polygons ...
+  OutRec *outRec1 = m_PolyOuts[e1->OutIdx];
+  OutRec *outRec2 = m_PolyOuts[e2->OutIdx];
+
+  OutRec *holeStateRec;
+  if (OutRec1RightOfOutRec2(outRec1, outRec2))
+    holeStateRec = outRec2;
+  else if (OutRec1RightOfOutRec2(outRec2, outRec1))
+    holeStateRec = outRec1;
+  else
+    holeStateRec = GetLowermostRec(outRec1, outRec2);
+
+  // get the start and ends of both output polygons and
+  // join e2 poly onto e1 poly and delete pointers to e2 ...
+
+  OutPt *p1_lft = outRec1->Pts;
+  OutPt *p1_rt = p1_lft->Prev;
+  OutPt *p2_lft = outRec2->Pts;
+  OutPt *p2_rt = p2_lft->Prev;
+
+  // join e2 poly onto e1 poly and delete pointers to e2 ...
+  if (e1->Side == esLeft) {
+    if (e2->Side == esLeft) {
+      // z y x a b c
+      ReversePolyPtLinks(p2_lft);
+      p2_lft->Next = p1_lft;
+      p1_lft->Prev = p2_lft;
+      p1_rt->Next = p2_rt;
+      p2_rt->Prev = p1_rt;
+      outRec1->Pts = p2_rt;
+    } else {
+      // x y z a b c
+      p2_rt->Next = p1_lft;
+      p1_lft->Prev = p2_rt;
+      p2_lft->Prev = p1_rt;
+      p1_rt->Next = p2_lft;
+      outRec1->Pts = p2_lft;
+    }
+  } else {
+    if (e2->Side == esRight) {
+      // a b c z y x
+      ReversePolyPtLinks(p2_lft);
+      p1_rt->Next = p2_rt;
+      p2_rt->Prev = p1_rt;
+      p2_lft->Next = p1_lft;
+      p1_lft->Prev = p2_lft;
+    } else {
+      // a b c x y z
+      p1_rt->Next = p2_lft;
+      p2_lft->Prev = p1_rt;
+      p1_lft->Prev = p2_rt;
+      p2_rt->Next = p1_lft;
+    }
+  }
+
+  outRec1->BottomPt = 0;
+  if (holeStateRec == outRec2) {
+    if (outRec2->FirstLeft != outRec1)
+      outRec1->FirstLeft = outRec2->FirstLeft;
+    outRec1->IsHole = outRec2->IsHole;
+  }
+  outRec2->Pts = 0;
+  outRec2->BottomPt = 0;
+  outRec2->FirstLeft = outRec1;
+
+  int OKIdx = e1->OutIdx;
+  int ObsoleteIdx = e2->OutIdx;
+
+  e1->OutIdx =
+      Unassigned; // nb: safe because we only get here via AddLocalMaxPoly
+  e2->OutIdx = Unassigned;
+
+  TEdge *e = m_ActiveEdges;
+  while (e) {
+    if (e->OutIdx == ObsoleteIdx) {
+      e->OutIdx = OKIdx;
+      e->Side = e1->Side;
+      break;
+    }
+    e = e->NextInAEL;
+  }
+
+  outRec2->Idx = outRec1->Idx;
+}
+//------------------------------------------------------------------------------
+
+OutPt *Clipper::AddOutPt(TEdge *e, const IntPoint &pt) {
+  if (e->OutIdx < 0) {
+    OutRec *outRec = CreateOutRec();
+    outRec->IsOpen = (e->WindDelta == 0);
+    OutPt *newOp = new OutPt;
+    outRec->Pts = newOp;
+    newOp->Idx = outRec->Idx;
+    newOp->Pt = pt;
+    newOp->Next = newOp;
+    newOp->Prev = newOp;
+    if (!outRec->IsOpen)
+      SetHoleState(e, outRec);
+    e->OutIdx = outRec->Idx;
+    return newOp;
+  } else {
+    OutRec *outRec = m_PolyOuts[e->OutIdx];
+    // OutRec.Pts is the 'Left-most' point & OutRec.Pts.Prev is the 'Right-most'
+    OutPt *op = outRec->Pts;
+
+    bool ToFront = (e->Side == esLeft);
+    if (ToFront && (pt == op->Pt))
+      return op;
+    else if (!ToFront && (pt == op->Prev->Pt))
+      return op->Prev;
+
+    OutPt *newOp = new OutPt;
+    newOp->Idx = outRec->Idx;
+    newOp->Pt = pt;
+    newOp->Next = op;
+    newOp->Prev = op->Prev;
+    newOp->Prev->Next = newOp;
+    op->Prev = newOp;
+    if (ToFront)
+      outRec->Pts = newOp;
+    return newOp;
+  }
+}
+//------------------------------------------------------------------------------
+
+OutPt *Clipper::GetLastOutPt(TEdge *e) {
+  OutRec *outRec = m_PolyOuts[e->OutIdx];
+  if (e->Side == esLeft)
+    return outRec->Pts;
+  else
+    return outRec->Pts->Prev;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ProcessHorizontals() {
+  TEdge *horzEdge;
+  while (PopEdgeFromSEL(horzEdge))
+    ProcessHorizontal(horzEdge);
+}
+//------------------------------------------------------------------------------
+
+inline bool IsMinima(TEdge *e) {
+  return e && (e->Prev->NextInLML != e) && (e->Next->NextInLML != e);
+}
+//------------------------------------------------------------------------------
+
+inline bool IsMaxima(TEdge *e, const cInt Y) {
+  return e && e->Top.Y == Y && !e->NextInLML;
+}
+//------------------------------------------------------------------------------
+
+inline bool IsIntermediate(TEdge *e, const cInt Y) {
+  return e->Top.Y == Y && e->NextInLML;
+}
+//------------------------------------------------------------------------------
+
+TEdge *GetMaximaPair(TEdge *e) {
+  if ((e->Next->Top == e->Top) && !e->Next->NextInLML)
+    return e->Next;
+  else if ((e->Prev->Top == e->Top) && !e->Prev->NextInLML)
+    return e->Prev;
+  else
+    return 0;
+}
+//------------------------------------------------------------------------------
+
+TEdge *GetMaximaPairEx(TEdge *e) {
+  // as GetMaximaPair() but returns 0 if MaxPair isn't in AEL (unless it's
+  // horizontal)
+  TEdge *result = GetMaximaPair(e);
+  if (result &&
+      (result->OutIdx == Skip ||
+       (result->NextInAEL == result->PrevInAEL && !IsHorizontal(*result))))
+    return 0;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::SwapPositionsInSEL(TEdge *Edge1, TEdge *Edge2) {
+  if (!(Edge1->NextInSEL) && !(Edge1->PrevInSEL))
+    return;
+  if (!(Edge2->NextInSEL) && !(Edge2->PrevInSEL))
+    return;
+
+  if (Edge1->NextInSEL == Edge2) {
+    TEdge *Next = Edge2->NextInSEL;
+    if (Next)
+      Next->PrevInSEL = Edge1;
+    TEdge *Prev = Edge1->PrevInSEL;
+    if (Prev)
+      Prev->NextInSEL = Edge2;
+    Edge2->PrevInSEL = Prev;
+    Edge2->NextInSEL = Edge1;
+    Edge1->PrevInSEL = Edge2;
+    Edge1->NextInSEL = Next;
+  } else if (Edge2->NextInSEL == Edge1) {
+    TEdge *Next = Edge1->NextInSEL;
+    if (Next)
+      Next->PrevInSEL = Edge2;
+    TEdge *Prev = Edge2->PrevInSEL;
+    if (Prev)
+      Prev->NextInSEL = Edge1;
+    Edge1->PrevInSEL = Prev;
+    Edge1->NextInSEL = Edge2;
+    Edge2->PrevInSEL = Edge1;
+    Edge2->NextInSEL = Next;
+  } else {
+    TEdge *Next = Edge1->NextInSEL;
+    TEdge *Prev = Edge1->PrevInSEL;
+    Edge1->NextInSEL = Edge2->NextInSEL;
+    if (Edge1->NextInSEL)
+      Edge1->NextInSEL->PrevInSEL = Edge1;
+    Edge1->PrevInSEL = Edge2->PrevInSEL;
+    if (Edge1->PrevInSEL)
+      Edge1->PrevInSEL->NextInSEL = Edge1;
+    Edge2->NextInSEL = Next;
+    if (Edge2->NextInSEL)
+      Edge2->NextInSEL->PrevInSEL = Edge2;
+    Edge2->PrevInSEL = Prev;
+    if (Edge2->PrevInSEL)
+      Edge2->PrevInSEL->NextInSEL = Edge2;
+  }
+
+  if (!Edge1->PrevInSEL)
+    m_SortedEdges = Edge1;
+  else if (!Edge2->PrevInSEL)
+    m_SortedEdges = Edge2;
+}
+//------------------------------------------------------------------------------
+
+TEdge *GetNextInAEL(TEdge *e, Direction dir) {
+  return dir == dLeftToRight ? e->NextInAEL : e->PrevInAEL;
+}
+//------------------------------------------------------------------------------
+
+void GetHorzDirection(TEdge &HorzEdge, Direction &Dir, cInt &Left,
+                      cInt &Right) {
+  if (HorzEdge.Bot.X < HorzEdge.Top.X) {
+    Left = HorzEdge.Bot.X;
+    Right = HorzEdge.Top.X;
+    Dir = dLeftToRight;
+  } else {
+    Left = HorzEdge.Top.X;
+    Right = HorzEdge.Bot.X;
+    Dir = dRightToLeft;
+  }
+}
+//------------------------------------------------------------------------
+
+/*******************************************************************************
+ * Notes: Horizontal edges (HEs) at scanline intersections (ie at the Top or *
+ * Bottom of a scanbeam) are processed as if layered. The order in which HEs *
+ * are processed doesn't matter. HEs intersect with other HE Bot.Xs only [#] *
+ * (or they could intersect with Top.Xs only, ie EITHER Bot.Xs OR Top.Xs), * and
+ *with other non-horizontal edges [*]. Once these intersections are        *
+ * processed, intermediate HEs then 'promote' the Edge above (NextInLML) into *
+ * the AEL. These 'promoted' edges may in turn intersect [%] with other HEs. *
+ *******************************************************************************/
+
+void Clipper::ProcessHorizontal(TEdge *horzEdge) {
+  Direction dir;
+  cInt horzLeft, horzRight;
+  bool IsOpen = (horzEdge->WindDelta == 0);
+
+  GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
+
+  TEdge *eLastHorz = horzEdge, *eMaxPair = 0;
+  while (eLastHorz->NextInLML && IsHorizontal(*eLastHorz->NextInLML))
+    eLastHorz = eLastHorz->NextInLML;
+  if (!eLastHorz->NextInLML)
+    eMaxPair = GetMaximaPair(eLastHorz);
+
+  MaximaList::const_iterator maxIt;
+  MaximaList::const_reverse_iterator maxRit;
+  if (m_Maxima.size() > 0) {
+    // get the first maxima in range (X) ...
+    if (dir == dLeftToRight) {
+      maxIt = m_Maxima.begin();
+      while (maxIt != m_Maxima.end() && *maxIt <= horzEdge->Bot.X)
+        ++maxIt;
+      if (maxIt != m_Maxima.end() && *maxIt >= eLastHorz->Top.X)
+        maxIt = m_Maxima.end();
+    } else {
+      maxRit = m_Maxima.rbegin();
+      while (maxRit != m_Maxima.rend() && *maxRit > horzEdge->Bot.X)
+        ++maxRit;
+      if (maxRit != m_Maxima.rend() && *maxRit <= eLastHorz->Top.X)
+        maxRit = m_Maxima.rend();
+    }
+  }
+
+  OutPt *op1 = 0;
+
+  for (;;) // loop through consec. horizontal edges
+  {
+    bool IsLastHorz = (horzEdge == eLastHorz);
+    TEdge *e = GetNextInAEL(horzEdge, dir);
+    while (e) {
+      // this code block inserts extra coords into horizontal edges (in output
+      // polygons) whereever maxima touch these horizontal edges. This helps
+      //'simplifying' polygons (ie if the Simplify property is set).
+      if (m_Maxima.size() > 0) {
+        if (dir == dLeftToRight) {
+          while (maxIt != m_Maxima.end() && *maxIt < e->Curr.X) {
+            if (horzEdge->OutIdx >= 0 && !IsOpen)
+              AddOutPt(horzEdge, IntPoint(*maxIt, horzEdge->Bot.Y));
+            ++maxIt;
+          }
+        } else {
+          while (maxRit != m_Maxima.rend() && *maxRit > e->Curr.X) {
+            if (horzEdge->OutIdx >= 0 && !IsOpen)
+              AddOutPt(horzEdge, IntPoint(*maxRit, horzEdge->Bot.Y));
+            ++maxRit;
+          }
+        }
+      };
+
+      if ((dir == dLeftToRight && e->Curr.X > horzRight) ||
+          (dir == dRightToLeft && e->Curr.X < horzLeft))
+        break;
+
+      // Also break if we've got to the end of an intermediate horizontal edge
+      // ...
+      // nb: Smaller Dx's are to the right of larger Dx's ABOVE the horizontal.
+      if (e->Curr.X == horzEdge->Top.X && horzEdge->NextInLML &&
+          e->Dx < horzEdge->NextInLML->Dx)
+        break;
+
+      if (horzEdge->OutIdx >= 0 && !IsOpen) // note: may be done multiple times
+      {
+#ifdef use_xyz
+        if (dir == dLeftToRight)
+          SetZ(e->Curr, *horzEdge, *e);
+        else
+          SetZ(e->Curr, *e, *horzEdge);
+#endif
+        op1 = AddOutPt(horzEdge, e->Curr);
+        TEdge *eNextHorz = m_SortedEdges;
+        while (eNextHorz) {
+          if (eNextHorz->OutIdx >= 0 &&
+              HorzSegmentsOverlap(horzEdge->Bot.X, horzEdge->Top.X,
+                                  eNextHorz->Bot.X, eNextHorz->Top.X)) {
+            OutPt *op2 = GetLastOutPt(eNextHorz);
+            AddJoin(op2, op1, eNextHorz->Top);
+          }
+          eNextHorz = eNextHorz->NextInSEL;
+        }
+        AddGhostJoin(op1, horzEdge->Bot);
+      }
+
+      // OK, so far we're still in range of the horizontal Edge  but make sure
+      // we're at the last of consec. horizontals when matching with eMaxPair
+      if (e == eMaxPair && IsLastHorz) {
+        if (horzEdge->OutIdx >= 0)
+          AddLocalMaxPoly(horzEdge, eMaxPair, horzEdge->Top);
+        DeleteFromAEL(horzEdge);
+        DeleteFromAEL(eMaxPair);
+        return;
+      }
+
+      if (dir == dLeftToRight) {
+        IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
+        IntersectEdges(horzEdge, e, Pt);
+      } else {
+        IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
+        IntersectEdges(e, horzEdge, Pt);
+      }
+      TEdge *eNext = GetNextInAEL(e, dir);
+      SwapPositionsInAEL(horzEdge, e);
+      e = eNext;
+    } // end while(e)
+
+    // Break out of loop if HorzEdge.NextInLML is not also horizontal ...
+    if (!horzEdge->NextInLML || !IsHorizontal(*horzEdge->NextInLML))
+      break;
+
+    UpdateEdgeIntoAEL(horzEdge);
+    if (horzEdge->OutIdx >= 0)
+      AddOutPt(horzEdge, horzEdge->Bot);
+    GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
+
+  } // end for (;;)
+
+  if (horzEdge->OutIdx >= 0 && !op1) {
+    op1 = GetLastOutPt(horzEdge);
+    TEdge *eNextHorz = m_SortedEdges;
+    while (eNextHorz) {
+      if (eNextHorz->OutIdx >= 0 &&
+          HorzSegmentsOverlap(horzEdge->Bot.X, horzEdge->Top.X,
+                              eNextHorz->Bot.X, eNextHorz->Top.X)) {
+        OutPt *op2 = GetLastOutPt(eNextHorz);
+        AddJoin(op2, op1, eNextHorz->Top);
+      }
+      eNextHorz = eNextHorz->NextInSEL;
+    }
+    AddGhostJoin(op1, horzEdge->Top);
+  }
+
+  if (horzEdge->NextInLML) {
+    if (horzEdge->OutIdx >= 0) {
+      op1 = AddOutPt(horzEdge, horzEdge->Top);
+      UpdateEdgeIntoAEL(horzEdge);
+      if (horzEdge->WindDelta == 0)
+        return;
+      // nb: HorzEdge is no longer horizontal here
+      TEdge *ePrev = horzEdge->PrevInAEL;
+      TEdge *eNext = horzEdge->NextInAEL;
+      if (ePrev && ePrev->Curr.X == horzEdge->Bot.X &&
+          ePrev->Curr.Y == horzEdge->Bot.Y && ePrev->WindDelta != 0 &&
+          (ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
+           SlopesEqual(*horzEdge, *ePrev, m_UseFullRange))) {
+        OutPt *op2 = AddOutPt(ePrev, horzEdge->Bot);
+        AddJoin(op1, op2, horzEdge->Top);
+      } else if (eNext && eNext->Curr.X == horzEdge->Bot.X &&
+                 eNext->Curr.Y == horzEdge->Bot.Y && eNext->WindDelta != 0 &&
+                 eNext->OutIdx >= 0 && eNext->Curr.Y > eNext->Top.Y &&
+                 SlopesEqual(*horzEdge, *eNext, m_UseFullRange)) {
+        OutPt *op2 = AddOutPt(eNext, horzEdge->Bot);
+        AddJoin(op1, op2, horzEdge->Top);
+      }
+    } else
+      UpdateEdgeIntoAEL(horzEdge);
+  } else {
+    if (horzEdge->OutIdx >= 0)
+      AddOutPt(horzEdge, horzEdge->Top);
+    DeleteFromAEL(horzEdge);
+  }
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::ProcessIntersections(const cInt topY) {
+  if (!m_ActiveEdges)
+    return true;
+  try {
+    BuildIntersectList(topY);
+    size_t IlSize = m_IntersectList.size();
+    if (IlSize == 0)
+      return true;
+    if (IlSize == 1 || FixupIntersectionOrder())
+      ProcessIntersectList();
+    else
+      return false;
+  } catch (...) {
+    m_SortedEdges = 0;
+    DisposeIntersectNodes();
+    throw clipperException("ProcessIntersections error");
+  }
+  m_SortedEdges = 0;
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::DisposeIntersectNodes() {
+  for (size_t i = 0; i < m_IntersectList.size(); ++i)
+    delete m_IntersectList[i];
+  m_IntersectList.clear();
+}
+//------------------------------------------------------------------------------
+
+void Clipper::BuildIntersectList(const cInt topY) {
+  if (!m_ActiveEdges)
+    return;
+
+  // prepare for sorting ...
+  TEdge *e = m_ActiveEdges;
+  m_SortedEdges = e;
+  while (e) {
+    e->PrevInSEL = e->PrevInAEL;
+    e->NextInSEL = e->NextInAEL;
+    e->Curr.X = TopX(*e, topY);
+    e = e->NextInAEL;
+  }
+
+  // bubblesort ...
+  bool isModified;
+  do {
+    isModified = false;
+    e = m_SortedEdges;
+    while (e->NextInSEL) {
+      TEdge *eNext = e->NextInSEL;
+      IntPoint Pt;
+      if (e->Curr.X > eNext->Curr.X) {
+        IntersectPoint(*e, *eNext, Pt);
+        if (Pt.Y < topY)
+          Pt = IntPoint(TopX(*e, topY), topY);
+        IntersectNode *newNode = new IntersectNode;
+        newNode->Edge1 = e;
+        newNode->Edge2 = eNext;
+        newNode->Pt = Pt;
+        m_IntersectList.push_back(newNode);
+
+        SwapPositionsInSEL(e, eNext);
+        isModified = true;
+      } else
+        e = eNext;
+    }
+    if (e->PrevInSEL)
+      e->PrevInSEL->NextInSEL = 0;
+    else
+      break;
+  } while (isModified);
+  m_SortedEdges = 0; // important
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ProcessIntersectList() {
+  for (size_t i = 0; i < m_IntersectList.size(); ++i) {
+    IntersectNode *iNode = m_IntersectList[i];
+    {
+      IntersectEdges(iNode->Edge1, iNode->Edge2, iNode->Pt);
+      SwapPositionsInAEL(iNode->Edge1, iNode->Edge2);
+    }
+    delete iNode;
+  }
+  m_IntersectList.clear();
+}
+//------------------------------------------------------------------------------
+
+bool IntersectListSort(IntersectNode *node1, IntersectNode *node2) {
+  return node2->Pt.Y < node1->Pt.Y;
+}
+//------------------------------------------------------------------------------
+
+inline bool EdgesAdjacent(const IntersectNode &inode) {
+  return (inode.Edge1->NextInSEL == inode.Edge2) ||
+         (inode.Edge1->PrevInSEL == inode.Edge2);
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::FixupIntersectionOrder() {
+  // pre-condition: intersections are sorted Bottom-most first.
+  // Now it's crucial that intersections are made only between adjacent edges,
+  // so to ensure this the order of intersections may need adjusting ...
+  CopyAELToSEL();
+  std::sort(m_IntersectList.begin(), m_IntersectList.end(), IntersectListSort);
+  size_t cnt = m_IntersectList.size();
+  for (size_t i = 0; i < cnt; ++i) {
+    if (!EdgesAdjacent(*m_IntersectList[i])) {
+      size_t j = i + 1;
+      while (j < cnt && !EdgesAdjacent(*m_IntersectList[j]))
+        j++;
+      if (j == cnt)
+        return false;
+      std::swap(m_IntersectList[i], m_IntersectList[j]);
+    }
+    SwapPositionsInSEL(m_IntersectList[i]->Edge1, m_IntersectList[i]->Edge2);
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::DoMaxima(TEdge *e) {
+  TEdge *eMaxPair = GetMaximaPairEx(e);
+  if (!eMaxPair) {
+    if (e->OutIdx >= 0)
+      AddOutPt(e, e->Top);
+    DeleteFromAEL(e);
+    return;
+  }
+
+  TEdge *eNext = e->NextInAEL;
+  while (eNext && eNext != eMaxPair) {
+    IntersectEdges(e, eNext, e->Top);
+    SwapPositionsInAEL(e, eNext);
+    eNext = e->NextInAEL;
+  }
+
+  if (e->OutIdx == Unassigned && eMaxPair->OutIdx == Unassigned) {
+    DeleteFromAEL(e);
+    DeleteFromAEL(eMaxPair);
+  } else if (e->OutIdx >= 0 && eMaxPair->OutIdx >= 0) {
+    if (e->OutIdx >= 0)
+      AddLocalMaxPoly(e, eMaxPair, e->Top);
+    DeleteFromAEL(e);
+    DeleteFromAEL(eMaxPair);
+  }
+#ifdef use_lines
+  else if (e->WindDelta == 0) {
+    if (e->OutIdx >= 0) {
+      AddOutPt(e, e->Top);
+      e->OutIdx = Unassigned;
+    }
+    DeleteFromAEL(e);
+
+    if (eMaxPair->OutIdx >= 0) {
+      AddOutPt(eMaxPair, e->Top);
+      eMaxPair->OutIdx = Unassigned;
+    }
+    DeleteFromAEL(eMaxPair);
+  }
+#endif
+  else
+    throw clipperException("DoMaxima error");
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ProcessEdgesAtTopOfScanbeam(const cInt topY) {
+  TEdge *e = m_ActiveEdges;
+  while (e) {
+    // 1. process maxima, treating them as if they're 'bent' horizontal edges,
+    //   but exclude maxima with horizontal edges. nb: e can't be a horizontal.
+    bool IsMaximaEdge = IsMaxima(e, topY);
+
+    if (IsMaximaEdge) {
+      TEdge *eMaxPair = GetMaximaPairEx(e);
+      IsMaximaEdge = (!eMaxPair || !IsHorizontal(*eMaxPair));
+    }
+
+    if (IsMaximaEdge) {
+      if (m_StrictSimple)
+        m_Maxima.push_back(e->Top.X);
+      TEdge *ePrev = e->PrevInAEL;
+      DoMaxima(e);
+      if (!ePrev)
+        e = m_ActiveEdges;
+      else
+        e = ePrev->NextInAEL;
+    } else {
+      // 2. promote horizontal edges, otherwise update Curr.X and Curr.Y ...
+      if (IsIntermediate(e, topY) && IsHorizontal(*e->NextInLML)) {
+        UpdateEdgeIntoAEL(e);
+        if (e->OutIdx >= 0)
+          AddOutPt(e, e->Bot);
+        AddEdgeToSEL(e);
+      } else {
+        e->Curr.X = TopX(*e, topY);
+        e->Curr.Y = topY;
+#ifdef use_xyz
+        e->Curr.Z =
+            topY == e->Top.Y ? e->Top.Z : (topY == e->Bot.Y ? e->Bot.Z : 0);
+#endif
+      }
+
+      // When StrictlySimple and 'e' is being touched by another edge, then
+      // make sure both edges have a vertex here ...
+      if (m_StrictSimple) {
+        TEdge *ePrev = e->PrevInAEL;
+        if ((e->OutIdx >= 0) && (e->WindDelta != 0) && ePrev &&
+            (ePrev->OutIdx >= 0) && (ePrev->Curr.X == e->Curr.X) &&
+            (ePrev->WindDelta != 0)) {
+          IntPoint pt = e->Curr;
+#ifdef use_xyz
+          SetZ(pt, *ePrev, *e);
+#endif
+          OutPt *op = AddOutPt(ePrev, pt);
+          OutPt *op2 = AddOutPt(e, pt);
+          AddJoin(op, op2, pt); // StrictlySimple (type-3) join
+        }
+      }
+
+      e = e->NextInAEL;
+    }
+  }
+
+  // 3. Process horizontals at the Top of the scanbeam ...
+  m_Maxima.sort();
+  ProcessHorizontals();
+  m_Maxima.clear();
+
+  // 4. Promote intermediate vertices ...
+  e = m_ActiveEdges;
+  while (e) {
+    if (IsIntermediate(e, topY)) {
+      OutPt *op = 0;
+      if (e->OutIdx >= 0)
+        op = AddOutPt(e, e->Top);
+      UpdateEdgeIntoAEL(e);
+
+      // if output polygons share an edge, they'll need joining later ...
+      TEdge *ePrev = e->PrevInAEL;
+      TEdge *eNext = e->NextInAEL;
+      if (ePrev && ePrev->Curr.X == e->Bot.X && ePrev->Curr.Y == e->Bot.Y &&
+          op && ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
+          SlopesEqual(e->Curr, e->Top, ePrev->Curr, ePrev->Top,
+                      m_UseFullRange) &&
+          (e->WindDelta != 0) && (ePrev->WindDelta != 0)) {
+        OutPt *op2 = AddOutPt(ePrev, e->Bot);
+        AddJoin(op, op2, e->Top);
+      } else if (eNext && eNext->Curr.X == e->Bot.X &&
+                 eNext->Curr.Y == e->Bot.Y && op && eNext->OutIdx >= 0 &&
+                 eNext->Curr.Y > eNext->Top.Y &&
+                 SlopesEqual(e->Curr, e->Top, eNext->Curr, eNext->Top,
+                             m_UseFullRange) &&
+                 (e->WindDelta != 0) && (eNext->WindDelta != 0)) {
+        OutPt *op2 = AddOutPt(eNext, e->Bot);
+        AddJoin(op, op2, e->Top);
+      }
+    }
+    e = e->NextInAEL;
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixupOutPolyline(OutRec &outrec) {
+  OutPt *pp = outrec.Pts;
+  OutPt *lastPP = pp->Prev;
+  while (pp != lastPP) {
+    pp = pp->Next;
+    if (pp->Pt == pp->Prev->Pt) {
+      if (pp == lastPP)
+        lastPP = pp->Prev;
+      OutPt *tmpPP = pp->Prev;
+      tmpPP->Next = pp->Next;
+      pp->Next->Prev = tmpPP;
+      delete pp;
+      pp = tmpPP;
+    }
+  }
+
+  if (pp == pp->Prev) {
+    DisposeOutPts(pp);
+    outrec.Pts = 0;
+    return;
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixupOutPolygon(OutRec &outrec) {
+  // FixupOutPolygon() - removes duplicate points and simplifies consecutive
+  // parallel edges by removing the middle vertex.
+  OutPt *lastOK = 0;
+  outrec.BottomPt = 0;
+  OutPt *pp = outrec.Pts;
+  bool preserveCol = m_PreserveCollinear || m_StrictSimple;
+
+  for (;;) {
+    if (pp->Prev == pp || pp->Prev == pp->Next) {
+      DisposeOutPts(pp);
+      outrec.Pts = 0;
+      return;
+    }
+
+    // test for duplicate points and collinear edges ...
+    if ((pp->Pt == pp->Next->Pt) || (pp->Pt == pp->Prev->Pt) ||
+        (SlopesEqual(pp->Prev->Pt, pp->Pt, pp->Next->Pt, m_UseFullRange) &&
+         (!preserveCol ||
+          !Pt2IsBetweenPt1AndPt3(pp->Prev->Pt, pp->Pt, pp->Next->Pt)))) {
+      lastOK = 0;
+      OutPt *tmp = pp;
+      pp->Prev->Next = pp->Next;
+      pp->Next->Prev = pp->Prev;
+      pp = pp->Prev;
+      delete tmp;
+    } else if (pp == lastOK)
+      break;
+    else {
+      if (!lastOK)
+        lastOK = pp;
+      pp = pp->Next;
+    }
+  }
+  outrec.Pts = pp;
+}
+//------------------------------------------------------------------------------
+
+int PointCount(OutPt *Pts) {
+  if (!Pts)
+    return 0;
+  int result = 0;
+  OutPt *p = Pts;
+  do {
+    result++;
+    p = p->Next;
+  } while (p != Pts);
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::BuildResult(Paths &polys) {
+  polys.reserve(m_PolyOuts.size());
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    if (!m_PolyOuts[i]->Pts)
+      continue;
+    Path pg;
+    OutPt *p = m_PolyOuts[i]->Pts->Prev;
+    int cnt = PointCount(p);
+    if (cnt < 2)
+      continue;
+    pg.reserve(cnt);
+    for (int i = 0; i < cnt; ++i) {
+      pg.push_back(p->Pt);
+      p = p->Prev;
+    }
+    polys.push_back(pg);
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::BuildResult2(PolyTree &polytree) {
+  polytree.Clear();
+  polytree.AllNodes.reserve(m_PolyOuts.size());
+  // add each output polygon/contour to polytree ...
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++) {
+    OutRec *outRec = m_PolyOuts[i];
+    int cnt = PointCount(outRec->Pts);
+    if ((outRec->IsOpen && cnt < 2) || (!outRec->IsOpen && cnt < 3))
+      continue;
+    FixHoleLinkage(*outRec);
+    PolyNode *pn = new PolyNode();
+    // nb: polytree takes ownership of all the PolyNodes
+    polytree.AllNodes.push_back(pn);
+    outRec->PolyNd = pn;
+    pn->Parent = 0;
+    pn->Index = 0;
+    pn->Contour.reserve(cnt);
+    OutPt *op = outRec->Pts->Prev;
+    for (int j = 0; j < cnt; j++) {
+      pn->Contour.push_back(op->Pt);
+      op = op->Prev;
+    }
+  }
+
+  // fixup PolyNode links etc ...
+  polytree.Childs.reserve(m_PolyOuts.size());
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++) {
+    OutRec *outRec = m_PolyOuts[i];
+    if (!outRec->PolyNd)
+      continue;
+    if (outRec->IsOpen) {
+      outRec->PolyNd->m_IsOpen = true;
+      polytree.AddChild(*outRec->PolyNd);
+    } else if (outRec->FirstLeft && outRec->FirstLeft->PolyNd)
+      outRec->FirstLeft->PolyNd->AddChild(*outRec->PolyNd);
+    else
+      polytree.AddChild(*outRec->PolyNd);
+  }
+}
+//------------------------------------------------------------------------------
+
+void SwapIntersectNodes(IntersectNode &int1, IntersectNode &int2) {
+  // just swap the contents (because fIntersectNodes is a single-linked-list)
+  IntersectNode inode = int1; // gets a copy of Int1
+  int1.Edge1 = int2.Edge1;
+  int1.Edge2 = int2.Edge2;
+  int1.Pt = int2.Pt;
+  int2.Edge1 = inode.Edge1;
+  int2.Edge2 = inode.Edge2;
+  int2.Pt = inode.Pt;
+}
+//------------------------------------------------------------------------------
+
+inline bool E2InsertsBeforeE1(TEdge &e1, TEdge &e2) {
+  if (e2.Curr.X == e1.Curr.X) {
+    if (e2.Top.Y > e1.Top.Y)
+      return e2.Top.X < TopX(e1, e2.Top.Y);
+    else
+      return e1.Top.X > TopX(e2, e1.Top.Y);
+  } else
+    return e2.Curr.X < e1.Curr.X;
+}
+//------------------------------------------------------------------------------
+
+bool GetOverlap(const cInt a1, const cInt a2, const cInt b1, const cInt b2,
+                cInt &Left, cInt &Right) {
+  if (a1 < a2) {
+    if (b1 < b2) {
+      Left = std::max(a1, b1);
+      Right = std::min(a2, b2);
+    } else {
+      Left = std::max(a1, b2);
+      Right = std::min(a2, b1);
+    }
+  } else {
+    if (b1 < b2) {
+      Left = std::max(a2, b1);
+      Right = std::min(a1, b2);
+    } else {
+      Left = std::max(a2, b2);
+      Right = std::min(a1, b1);
+    }
+  }
+  return Left < Right;
+}
+//------------------------------------------------------------------------------
+
+inline void UpdateOutPtIdxs(OutRec &outrec) {
+  OutPt *op = outrec.Pts;
+  do {
+    op->Idx = outrec.Idx;
+    op = op->Prev;
+  } while (op != outrec.Pts);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge) {
+  if (!m_ActiveEdges) {
+    edge->PrevInAEL = 0;
+    edge->NextInAEL = 0;
+    m_ActiveEdges = edge;
+  } else if (!startEdge && E2InsertsBeforeE1(*m_ActiveEdges, *edge)) {
+    edge->PrevInAEL = 0;
+    edge->NextInAEL = m_ActiveEdges;
+    m_ActiveEdges->PrevInAEL = edge;
+    m_ActiveEdges = edge;
+  } else {
+    if (!startEdge)
+      startEdge = m_ActiveEdges;
+    while (startEdge->NextInAEL &&
+           !E2InsertsBeforeE1(*startEdge->NextInAEL, *edge))
+      startEdge = startEdge->NextInAEL;
+    edge->NextInAEL = startEdge->NextInAEL;
+    if (startEdge->NextInAEL)
+      startEdge->NextInAEL->PrevInAEL = edge;
+    edge->PrevInAEL = startEdge;
+    startEdge->NextInAEL = edge;
+  }
+}
+//----------------------------------------------------------------------
+
+OutPt *DupOutPt(OutPt *outPt, bool InsertAfter) {
+  OutPt *result = new OutPt;
+  result->Pt = outPt->Pt;
+  result->Idx = outPt->Idx;
+  if (InsertAfter) {
+    result->Next = outPt->Next;
+    result->Prev = outPt;
+    outPt->Next->Prev = result;
+    outPt->Next = result;
+  } else {
+    result->Prev = outPt->Prev;
+    result->Next = outPt;
+    outPt->Prev->Next = result;
+    outPt->Prev = result;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+bool JoinHorz(OutPt *op1, OutPt *op1b, OutPt *op2, OutPt *op2b,
+              const IntPoint Pt, bool DiscardLeft) {
+  Direction Dir1 = (op1->Pt.X > op1b->Pt.X ? dRightToLeft : dLeftToRight);
+  Direction Dir2 = (op2->Pt.X > op2b->Pt.X ? dRightToLeft : dLeftToRight);
+  if (Dir1 == Dir2)
+    return false;
+
+  // When DiscardLeft, we want Op1b to be on the Left of Op1, otherwise we
+  // want Op1b to be on the Right. (And likewise with Op2 and Op2b.)
+  // So, to facilitate this while inserting Op1b and Op2b ...
+  // when DiscardLeft, make sure we're AT or RIGHT of Pt before adding Op1b,
+  // otherwise make sure we're AT or LEFT of Pt. (Likewise with Op2b.)
+  if (Dir1 == dLeftToRight) {
+    while (op1->Next->Pt.X <= Pt.X && op1->Next->Pt.X >= op1->Pt.X &&
+           op1->Next->Pt.Y == Pt.Y)
+      op1 = op1->Next;
+    if (DiscardLeft && (op1->Pt.X != Pt.X))
+      op1 = op1->Next;
+    op1b = DupOutPt(op1, !DiscardLeft);
+    if (op1b->Pt != Pt) {
+      op1 = op1b;
+      op1->Pt = Pt;
+      op1b = DupOutPt(op1, !DiscardLeft);
+    }
+  } else {
+    while (op1->Next->Pt.X >= Pt.X && op1->Next->Pt.X <= op1->Pt.X &&
+           op1->Next->Pt.Y == Pt.Y)
+      op1 = op1->Next;
+    if (!DiscardLeft && (op1->Pt.X != Pt.X))
+      op1 = op1->Next;
+    op1b = DupOutPt(op1, DiscardLeft);
+    if (op1b->Pt != Pt) {
+      op1 = op1b;
+      op1->Pt = Pt;
+      op1b = DupOutPt(op1, DiscardLeft);
+    }
+  }
+
+  if (Dir2 == dLeftToRight) {
+    while (op2->Next->Pt.X <= Pt.X && op2->Next->Pt.X >= op2->Pt.X &&
+           op2->Next->Pt.Y == Pt.Y)
+      op2 = op2->Next;
+    if (DiscardLeft && (op2->Pt.X != Pt.X))
+      op2 = op2->Next;
+    op2b = DupOutPt(op2, !DiscardLeft);
+    if (op2b->Pt != Pt) {
+      op2 = op2b;
+      op2->Pt = Pt;
+      op2b = DupOutPt(op2, !DiscardLeft);
+    };
+  } else {
+    while (op2->Next->Pt.X >= Pt.X && op2->Next->Pt.X <= op2->Pt.X &&
+           op2->Next->Pt.Y == Pt.Y)
+      op2 = op2->Next;
+    if (!DiscardLeft && (op2->Pt.X != Pt.X))
+      op2 = op2->Next;
+    op2b = DupOutPt(op2, DiscardLeft);
+    if (op2b->Pt != Pt) {
+      op2 = op2b;
+      op2->Pt = Pt;
+      op2b = DupOutPt(op2, DiscardLeft);
+    };
+  };
+
+  if ((Dir1 == dLeftToRight) == DiscardLeft) {
+    op1->Prev = op2;
+    op2->Next = op1;
+    op1b->Next = op2b;
+    op2b->Prev = op1b;
+  } else {
+    op1->Next = op2;
+    op2->Prev = op1;
+    op1b->Prev = op2b;
+    op2b->Next = op1b;
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2) {
+  OutPt *op1 = j->OutPt1, *op1b;
+  OutPt *op2 = j->OutPt2, *op2b;
+
+  // There are 3 kinds of joins for output polygons ...
+  // 1. Horizontal joins where Join.OutPt1 & Join.OutPt2 are vertices anywhere
+  // along (horizontal) collinear edges (& Join.OffPt is on the same
+  // horizontal).
+  // 2. Non-horizontal joins where Join.OutPt1 & Join.OutPt2 are at the same
+  // location at the Bottom of the overlapping segment (& Join.OffPt is above).
+  // 3. StrictSimple joins where edges touch but are not collinear and where
+  // Join.OutPt1, Join.OutPt2 & Join.OffPt all share the same point.
+  bool isHorizontal = (j->OutPt1->Pt.Y == j->OffPt.Y);
+
+  if (isHorizontal && (j->OffPt == j->OutPt1->Pt) &&
+      (j->OffPt == j->OutPt2->Pt)) {
+    // Strictly Simple join ...
+    if (outRec1 != outRec2)
+      return false;
+    op1b = j->OutPt1->Next;
+    while (op1b != op1 && (op1b->Pt == j->OffPt))
+      op1b = op1b->Next;
+    bool reverse1 = (op1b->Pt.Y > j->OffPt.Y);
+    op2b = j->OutPt2->Next;
+    while (op2b != op2 && (op2b->Pt == j->OffPt))
+      op2b = op2b->Next;
+    bool reverse2 = (op2b->Pt.Y > j->OffPt.Y);
+    if (reverse1 == reverse2)
+      return false;
+    if (reverse1) {
+      op1b = DupOutPt(op1, false);
+      op2b = DupOutPt(op2, true);
+      op1->Prev = op2;
+      op2->Next = op1;
+      op1b->Next = op2b;
+      op2b->Prev = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    } else {
+      op1b = DupOutPt(op1, true);
+      op2b = DupOutPt(op2, false);
+      op1->Next = op2;
+      op2->Prev = op1;
+      op1b->Prev = op2b;
+      op2b->Next = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    }
+  } else if (isHorizontal) {
+    // treat horizontal joins differently to non-horizontal joins since with
+    // them we're not yet sure where the overlapping is. OutPt1.Pt & OutPt2.Pt
+    // may be anywhere along the horizontal edge.
+    op1b = op1;
+    while (op1->Prev->Pt.Y == op1->Pt.Y && op1->Prev != op1b &&
+           op1->Prev != op2)
+      op1 = op1->Prev;
+    while (op1b->Next->Pt.Y == op1b->Pt.Y && op1b->Next != op1 &&
+           op1b->Next != op2)
+      op1b = op1b->Next;
+    if (op1b->Next == op1 || op1b->Next == op2)
+      return false; // a flat 'polygon'
+
+    op2b = op2;
+    while (op2->Prev->Pt.Y == op2->Pt.Y && op2->Prev != op2b &&
+           op2->Prev != op1b)
+      op2 = op2->Prev;
+    while (op2b->Next->Pt.Y == op2b->Pt.Y && op2b->Next != op2 &&
+           op2b->Next != op1)
+      op2b = op2b->Next;
+    if (op2b->Next == op2 || op2b->Next == op1)
+      return false; // a flat 'polygon'
+
+    cInt Left, Right;
+    // Op1 --> Op1b & Op2 --> Op2b are the extremites of the horizontal edges
+    if (!GetOverlap(op1->Pt.X, op1b->Pt.X, op2->Pt.X, op2b->Pt.X, Left, Right))
+      return false;
+
+    // DiscardLeftSide: when overlapping edges are joined, a spike will created
+    // which needs to be cleaned up. However, we don't want Op1 or Op2 caught up
+    // on the discard Side as either may still be needed for other joins ...
+    IntPoint Pt;
+    bool DiscardLeftSide;
+    if (op1->Pt.X >= Left && op1->Pt.X <= Right) {
+      Pt = op1->Pt;
+      DiscardLeftSide = (op1->Pt.X > op1b->Pt.X);
+    } else if (op2->Pt.X >= Left && op2->Pt.X <= Right) {
+      Pt = op2->Pt;
+      DiscardLeftSide = (op2->Pt.X > op2b->Pt.X);
+    } else if (op1b->Pt.X >= Left && op1b->Pt.X <= Right) {
+      Pt = op1b->Pt;
+      DiscardLeftSide = op1b->Pt.X > op1->Pt.X;
+    } else {
+      Pt = op2b->Pt;
+      DiscardLeftSide = (op2b->Pt.X > op2->Pt.X);
+    }
+    j->OutPt1 = op1;
+    j->OutPt2 = op2;
+    return JoinHorz(op1, op1b, op2, op2b, Pt, DiscardLeftSide);
+  } else {
+    // nb: For non-horizontal joins ...
+    //    1. Jr.OutPt1.Pt.Y == Jr.OutPt2.Pt.Y
+    //    2. Jr.OutPt1.Pt > Jr.OffPt.Y
+
+    // make sure the polygons are correctly oriented ...
+    op1b = op1->Next;
+    while ((op1b->Pt == op1->Pt) && (op1b != op1))
+      op1b = op1b->Next;
+    bool Reverse1 = ((op1b->Pt.Y > op1->Pt.Y) ||
+                     !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange));
+    if (Reverse1) {
+      op1b = op1->Prev;
+      while ((op1b->Pt == op1->Pt) && (op1b != op1))
+        op1b = op1b->Prev;
+      if ((op1b->Pt.Y > op1->Pt.Y) ||
+          !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange))
+        return false;
+    };
+    op2b = op2->Next;
+    while ((op2b->Pt == op2->Pt) && (op2b != op2))
+      op2b = op2b->Next;
+    bool Reverse2 = ((op2b->Pt.Y > op2->Pt.Y) ||
+                     !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange));
+    if (Reverse2) {
+      op2b = op2->Prev;
+      while ((op2b->Pt == op2->Pt) && (op2b != op2))
+        op2b = op2b->Prev;
+      if ((op2b->Pt.Y > op2->Pt.Y) ||
+          !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange))
+        return false;
+    }
+
+    if ((op1b == op1) || (op2b == op2) || (op1b == op2b) ||
+        ((outRec1 == outRec2) && (Reverse1 == Reverse2)))
+      return false;
+
+    if (Reverse1) {
+      op1b = DupOutPt(op1, false);
+      op2b = DupOutPt(op2, true);
+      op1->Prev = op2;
+      op2->Next = op1;
+      op1b->Next = op2b;
+      op2b->Prev = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    } else {
+      op1b = DupOutPt(op1, true);
+      op2b = DupOutPt(op2, false);
+      op1->Next = op2;
+      op2->Prev = op1;
+      op1b->Prev = op2b;
+      op2b->Next = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    }
+  }
+}
+//----------------------------------------------------------------------
+
+static OutRec *ParseFirstLeft(OutRec *FirstLeft) {
+  while (FirstLeft && !FirstLeft->Pts)
+    FirstLeft = FirstLeft->FirstLeft;
+  return FirstLeft;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec) {
+  // tests if NewOutRec contains the polygon before reassigning FirstLeft
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    OutRec *outRec = m_PolyOuts[i];
+    OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (outRec->Pts && firstLeft == OldOutRec) {
+      if (Poly2ContainsPoly1(outRec->Pts, NewOutRec->Pts))
+        outRec->FirstLeft = NewOutRec;
+    }
+  }
+}
+//----------------------------------------------------------------------
+
+void Clipper::FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec) {
+  // A polygon has split into two such that one is now the inner of the other.
+  // It's possible that these polygons now wrap around other polygons, so check
+  // every polygon that's also contained by OuterOutRec's FirstLeft container
+  //(including 0) to see if they've become inner to the new inner polygon ...
+  OutRec *orfl = OuterOutRec->FirstLeft;
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    OutRec *outRec = m_PolyOuts[i];
+
+    if (!outRec->Pts || outRec == OuterOutRec || outRec == InnerOutRec)
+      continue;
+    OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (firstLeft != orfl && firstLeft != InnerOutRec &&
+        firstLeft != OuterOutRec)
+      continue;
+    if (Poly2ContainsPoly1(outRec->Pts, InnerOutRec->Pts))
+      outRec->FirstLeft = InnerOutRec;
+    else if (Poly2ContainsPoly1(outRec->Pts, OuterOutRec->Pts))
+      outRec->FirstLeft = OuterOutRec;
+    else if (outRec->FirstLeft == InnerOutRec ||
+             outRec->FirstLeft == OuterOutRec)
+      outRec->FirstLeft = orfl;
+  }
+}
+//----------------------------------------------------------------------
+void Clipper::FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec) {
+  // reassigns FirstLeft WITHOUT testing if NewOutRec contains the polygon
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    OutRec *outRec = m_PolyOuts[i];
+    OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (outRec->Pts && firstLeft == OldOutRec)
+      outRec->FirstLeft = NewOutRec;
+  }
+}
+//----------------------------------------------------------------------
+
+void Clipper::JoinCommonEdges() {
+  for (JoinList::size_type i = 0; i < m_Joins.size(); i++) {
+    Join *join = m_Joins[i];
+
+    OutRec *outRec1 = GetOutRec(join->OutPt1->Idx);
+    OutRec *outRec2 = GetOutRec(join->OutPt2->Idx);
+
+    if (!outRec1->Pts || !outRec2->Pts)
+      continue;
+    if (outRec1->IsOpen || outRec2->IsOpen)
+      continue;
+
+    // get the polygon fragment with the correct hole state (FirstLeft)
+    // before calling JoinPoints() ...
+    OutRec *holeStateRec;
+    if (outRec1 == outRec2)
+      holeStateRec = outRec1;
+    else if (OutRec1RightOfOutRec2(outRec1, outRec2))
+      holeStateRec = outRec2;
+    else if (OutRec1RightOfOutRec2(outRec2, outRec1))
+      holeStateRec = outRec1;
+    else
+      holeStateRec = GetLowermostRec(outRec1, outRec2);
+
+    if (!JoinPoints(join, outRec1, outRec2))
+      continue;
+
+    if (outRec1 == outRec2) {
+      // instead of joining two polygons, we've just created a new one by
+      // splitting one polygon into two.
+      outRec1->Pts = join->OutPt1;
+      outRec1->BottomPt = 0;
+      outRec2 = CreateOutRec();
+      outRec2->Pts = join->OutPt2;
+
+      // update all OutRec2.Pts Idx's ...
+      UpdateOutPtIdxs(*outRec2);
+
+      if (Poly2ContainsPoly1(outRec2->Pts, outRec1->Pts)) {
+        // outRec1 contains outRec2 ...
+        outRec2->IsHole = !outRec1->IsHole;
+        outRec2->FirstLeft = outRec1;
+
+        if (m_UsingPolyTree)
+          FixupFirstLefts2(outRec2, outRec1);
+
+        if ((outRec2->IsHole ^ m_ReverseOutput) == (Area(*outRec2) > 0))
+          ReversePolyPtLinks(outRec2->Pts);
+
+      } else if (Poly2ContainsPoly1(outRec1->Pts, outRec2->Pts)) {
+        // outRec2 contains outRec1 ...
+        outRec2->IsHole = outRec1->IsHole;
+        outRec1->IsHole = !outRec2->IsHole;
+        outRec2->FirstLeft = outRec1->FirstLeft;
+        outRec1->FirstLeft = outRec2;
+
+        if (m_UsingPolyTree)
+          FixupFirstLefts2(outRec1, outRec2);
+
+        if ((outRec1->IsHole ^ m_ReverseOutput) == (Area(*outRec1) > 0))
+          ReversePolyPtLinks(outRec1->Pts);
+      } else {
+        // the 2 polygons are completely separate ...
+        outRec2->IsHole = outRec1->IsHole;
+        outRec2->FirstLeft = outRec1->FirstLeft;
+
+        // fixup FirstLeft pointers that may need reassigning to OutRec2
+        if (m_UsingPolyTree)
+          FixupFirstLefts1(outRec1, outRec2);
+      }
+
+    } else {
+      // joined 2 polygons together ...
+
+      outRec2->Pts = 0;
+      outRec2->BottomPt = 0;
+      outRec2->Idx = outRec1->Idx;
+
+      outRec1->IsHole = holeStateRec->IsHole;
+      if (holeStateRec == outRec2)
+        outRec1->FirstLeft = outRec2->FirstLeft;
+      outRec2->FirstLeft = outRec1;
+
+      if (m_UsingPolyTree)
+        FixupFirstLefts3(outRec2, outRec1);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// ClipperOffset support functions ...
+//------------------------------------------------------------------------------
+
+DoublePoint GetUnitNormal(const IntPoint &pt1, const IntPoint &pt2) {
+  if (pt2.X == pt1.X && pt2.Y == pt1.Y)
+    return DoublePoint(0, 0);
+
+  double Dx = (double)(pt2.X - pt1.X);
+  double dy = (double)(pt2.Y - pt1.Y);
+  double f = 1 * 1.0 / std::sqrt(Dx * Dx + dy * dy);
+  Dx *= f;
+  dy *= f;
+  return DoublePoint(dy, -Dx);
+}
+
+//------------------------------------------------------------------------------
+// ClipperOffset class
+//------------------------------------------------------------------------------
+
+ClipperOffset::ClipperOffset(double miterLimit, double arcTolerance) {
+  this->MiterLimit = miterLimit;
+  this->ArcTolerance = arcTolerance;
+  m_lowest.X = -1;
+}
+//------------------------------------------------------------------------------
+
+ClipperOffset::~ClipperOffset() { Clear(); }
+//------------------------------------------------------------------------------
+
+void ClipperOffset::Clear() {
+  for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
+    delete m_polyNodes.Childs[i];
+  m_polyNodes.Childs.clear();
+  m_lowest.X = -1;
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::AddPath(const Path &path, JoinType joinType,
+                            EndType endType) {
+  int highI = (int)path.size() - 1;
+  if (highI < 0)
+    return;
+  PolyNode *newNode = new PolyNode();
+  newNode->m_jointype = joinType;
+  newNode->m_endtype = endType;
+
+  // strip duplicate points from path and also get index to the lowest point ...
+  if (endType == etClosedLine || endType == etClosedPolygon)
+    while (highI > 0 && path[0] == path[highI])
+      highI--;
+  newNode->Contour.reserve(highI + 1);
+  newNode->Contour.push_back(path[0]);
+  int j = 0, k = 0;
+  for (int i = 1; i <= highI; i++)
+    if (newNode->Contour[j] != path[i]) {
+      j++;
+      newNode->Contour.push_back(path[i]);
+      if (path[i].Y > newNode->Contour[k].Y ||
+          (path[i].Y == newNode->Contour[k].Y &&
+           path[i].X < newNode->Contour[k].X))
+        k = j;
+    }
+  if (endType == etClosedPolygon && j < 2) {
+    delete newNode;
+    return;
+  }
+  m_polyNodes.AddChild(*newNode);
+
+  // if this path's lowest pt is lower than all the others then update m_lowest
+  if (endType != etClosedPolygon)
+    return;
+  if (m_lowest.X < 0)
+    m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
+  else {
+    IntPoint ip = m_polyNodes.Childs[(int)m_lowest.X]->Contour[(int)m_lowest.Y];
+    if (newNode->Contour[k].Y > ip.Y ||
+        (newNode->Contour[k].Y == ip.Y && newNode->Contour[k].X < ip.X))
+      m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::AddPaths(const Paths &paths, JoinType joinType,
+                             EndType endType) {
+  for (Paths::size_type i = 0; i < paths.size(); ++i)
+    AddPath(paths[i], joinType, endType);
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::FixOrientations() {
+  // fixup orientations of all closed paths if the orientation of the
+  // closed path with the lowermost vertex is wrong ...
+  if (m_lowest.X >= 0 &&
+      !Orientation(m_polyNodes.Childs[(int)m_lowest.X]->Contour)) {
+    for (int i = 0; i < m_polyNodes.ChildCount(); ++i) {
+      PolyNode &node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedPolygon ||
+          (node.m_endtype == etClosedLine && Orientation(node.Contour)))
+        ReversePath(node.Contour);
+    }
+  } else {
+    for (int i = 0; i < m_polyNodes.ChildCount(); ++i) {
+      PolyNode &node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedLine && !Orientation(node.Contour))
+        ReversePath(node.Contour);
+    }
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::Execute(Paths &solution, double delta) {
+  solution.clear();
+  FixOrientations();
+  DoOffset(delta);
+
+  // now clean up 'corners' ...
+  Clipper clpr;
+  clpr.AddPaths(m_destPolys, ptSubject, true);
+  if (delta > 0) {
+    clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
+  } else {
+    IntRect r = clpr.GetBounds();
+    Path outer(4);
+    outer[0] = IntPoint(r.left - 10, r.bottom + 10);
+    outer[1] = IntPoint(r.right + 10, r.bottom + 10);
+    outer[2] = IntPoint(r.right + 10, r.top - 10);
+    outer[3] = IntPoint(r.left - 10, r.top - 10);
+
+    clpr.AddPath(outer, ptSubject, true);
+    clpr.ReverseSolution(true);
+    clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
+    if (solution.size() > 0)
+      solution.erase(solution.begin());
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::Execute(PolyTree &solution, double delta) {
+  solution.Clear();
+  FixOrientations();
+  DoOffset(delta);
+
+  // now clean up 'corners' ...
+  Clipper clpr;
+  clpr.AddPaths(m_destPolys, ptSubject, true);
+  if (delta > 0) {
+    clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
+  } else {
+    IntRect r = clpr.GetBounds();
+    Path outer(4);
+    outer[0] = IntPoint(r.left - 10, r.bottom + 10);
+    outer[1] = IntPoint(r.right + 10, r.bottom + 10);
+    outer[2] = IntPoint(r.right + 10, r.top - 10);
+    outer[3] = IntPoint(r.left - 10, r.top - 10);
+
+    clpr.AddPath(outer, ptSubject, true);
+    clpr.ReverseSolution(true);
+    clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
+    // remove the outer PolyNode rectangle ...
+    if (solution.ChildCount() == 1 && solution.Childs[0]->ChildCount() > 0) {
+      PolyNode *outerNode = solution.Childs[0];
+      solution.Childs.reserve(outerNode->ChildCount());
+      solution.Childs[0] = outerNode->Childs[0];
+      solution.Childs[0]->Parent = outerNode->Parent;
+      for (int i = 1; i < outerNode->ChildCount(); ++i)
+        solution.AddChild(*outerNode->Childs[i]);
+    } else
+      solution.Clear();
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoOffset(double delta) {
+  m_destPolys.clear();
+  m_delta = delta;
+
+  // if Zero offset, just copy any CLOSED polygons to m_p and return ...
+  if (NEAR_ZERO(delta)) {
+    m_destPolys.reserve(m_polyNodes.ChildCount());
+    for (int i = 0; i < m_polyNodes.ChildCount(); i++) {
+      PolyNode &node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedPolygon)
+        m_destPolys.push_back(node.Contour);
+    }
+    return;
+  }
+
+  // see offset_triginometry3.svg in the documentation folder ...
+  if (MiterLimit > 2)
+    m_miterLim = 2 / (MiterLimit * MiterLimit);
+  else
+    m_miterLim = 0.5;
+
+  double y;
+  if (ArcTolerance <= 0.0)
+    y = def_arc_tolerance;
+  else if (ArcTolerance > std::fabs(delta) * def_arc_tolerance)
+    y = std::fabs(delta) * def_arc_tolerance;
+  else
+    y = ArcTolerance;
+  // see offset_triginometry2.svg in the documentation folder ...
+  double steps = pi / std::acos(1 - y / std::fabs(delta));
+  if (steps > std::fabs(delta) * pi)
+    steps = std::fabs(delta) * pi; // ie excessive precision check
+  m_sin = std::sin(two_pi / steps);
+  m_cos = std::cos(two_pi / steps);
+  m_StepsPerRad = steps / two_pi;
+  if (delta < 0.0)
+    m_sin = -m_sin;
+
+  m_destPolys.reserve(m_polyNodes.ChildCount() * 2);
+  for (int i = 0; i < m_polyNodes.ChildCount(); i++) {
+    PolyNode &node = *m_polyNodes.Childs[i];
+    m_srcPoly = node.Contour;
+
+    int len = (int)m_srcPoly.size();
+    if (len == 0 ||
+        (delta <= 0 && (len < 3 || node.m_endtype != etClosedPolygon)))
+      continue;
+
+    m_destPoly.clear();
+    if (len == 1) {
+      if (node.m_jointype == jtRound) {
+        double X = 1.0, Y = 0.0;
+        for (cInt j = 1; j <= steps; j++) {
+          m_destPoly.push_back(IntPoint(Round(m_srcPoly[0].X + X * delta),
+                                        Round(m_srcPoly[0].Y + Y * delta)));
+          double X2 = X;
+          X = X * m_cos - m_sin * Y;
+          Y = X2 * m_sin + Y * m_cos;
+        }
+      } else {
+        double X = -1.0, Y = -1.0;
+        for (int j = 0; j < 4; ++j) {
+          m_destPoly.push_back(IntPoint(Round(m_srcPoly[0].X + X * delta),
+                                        Round(m_srcPoly[0].Y + Y * delta)));
+          if (X < 0)
+            X = 1;
+          else if (Y < 0)
+            Y = 1;
+          else
+            X = -1;
+        }
+      }
+      m_destPolys.push_back(m_destPoly);
+      continue;
+    }
+    // build m_normals ...
+    m_normals.clear();
+    m_normals.reserve(len);
+    for (int j = 0; j < len - 1; ++j)
+      m_normals.push_back(GetUnitNormal(m_srcPoly[j], m_srcPoly[j + 1]));
+    if (node.m_endtype == etClosedLine || node.m_endtype == etClosedPolygon)
+      m_normals.push_back(GetUnitNormal(m_srcPoly[len - 1], m_srcPoly[0]));
+    else
+      m_normals.push_back(DoublePoint(m_normals[len - 2]));
+
+    if (node.m_endtype == etClosedPolygon) {
+      int k = len - 1;
+      for (int j = 0; j < len; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+    } else if (node.m_endtype == etClosedLine) {
+      int k = len - 1;
+      for (int j = 0; j < len; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+      m_destPoly.clear();
+      // re-build m_normals ...
+      DoublePoint n = m_normals[len - 1];
+      for (int j = len - 1; j > 0; j--)
+        m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
+      m_normals[0] = DoublePoint(-n.X, -n.Y);
+      k = 0;
+      for (int j = len - 1; j >= 0; j--)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+    } else {
+      int k = 0;
+      for (int j = 1; j < len - 1; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+
+      IntPoint pt1;
+      if (node.m_endtype == etOpenButt) {
+        int j = len - 1;
+        pt1 = IntPoint((cInt)Round(m_srcPoly[j].X + m_normals[j].X * delta),
+                       (cInt)Round(m_srcPoly[j].Y + m_normals[j].Y * delta));
+        m_destPoly.push_back(pt1);
+        pt1 = IntPoint((cInt)Round(m_srcPoly[j].X - m_normals[j].X * delta),
+                       (cInt)Round(m_srcPoly[j].Y - m_normals[j].Y * delta));
+        m_destPoly.push_back(pt1);
+      } else {
+        int j = len - 1;
+        k = len - 2;
+        m_sinA = 0;
+        m_normals[j] = DoublePoint(-m_normals[j].X, -m_normals[j].Y);
+        if (node.m_endtype == etOpenSquare)
+          DoSquare(j, k);
+        else
+          DoRound(j, k);
+      }
+
+      // re-build m_normals ...
+      for (int j = len - 1; j > 0; j--)
+        m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
+      m_normals[0] = DoublePoint(-m_normals[1].X, -m_normals[1].Y);
+
+      k = len - 1;
+      for (int j = k - 1; j > 0; --j)
+        OffsetPoint(j, k, node.m_jointype);
+
+      if (node.m_endtype == etOpenButt) {
+        pt1 = IntPoint((cInt)Round(m_srcPoly[0].X - m_normals[0].X * delta),
+                       (cInt)Round(m_srcPoly[0].Y - m_normals[0].Y * delta));
+        m_destPoly.push_back(pt1);
+        pt1 = IntPoint((cInt)Round(m_srcPoly[0].X + m_normals[0].X * delta),
+                       (cInt)Round(m_srcPoly[0].Y + m_normals[0].Y * delta));
+        m_destPoly.push_back(pt1);
+      } else {
+        k = 1;
+        m_sinA = 0;
+        if (node.m_endtype == etOpenSquare)
+          DoSquare(0, 1);
+        else
+          DoRound(0, 1);
+      }
+      m_destPolys.push_back(m_destPoly);
+    }
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::OffsetPoint(int j, int &k, JoinType jointype) {
+  // cross product ...
+  m_sinA = (m_normals[k].X * m_normals[j].Y - m_normals[j].X * m_normals[k].Y);
+  if (std::fabs(m_sinA * m_delta) < 1.0) {
+    // dot product ...
+    double cosA =
+        (m_normals[k].X * m_normals[j].X + m_normals[j].Y * m_normals[k].Y);
+    if (cosA > 0) // angle => 0 degrees
+    {
+      m_destPoly.push_back(
+          IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
+                   Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
+      return;
+    }
+    // else angle => 180 degrees
+  } else if (m_sinA > 1.0)
+    m_sinA = 1.0;
+  else if (m_sinA < -1.0)
+    m_sinA = -1.0;
+
+  if (m_sinA * m_delta < 0) {
+    m_destPoly.push_back(
+        IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
+                 Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
+    m_destPoly.push_back(m_srcPoly[j]);
+    m_destPoly.push_back(
+        IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
+                 Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
+  } else
+    switch (jointype) {
+    case jtMiter: {
+      double r = 1 + (m_normals[j].X * m_normals[k].X +
+                      m_normals[j].Y * m_normals[k].Y);
+      if (r >= m_miterLim)
+        DoMiter(j, k, r);
+      else
+        DoSquare(j, k);
+      break;
+    }
+    case jtSquare:
+      DoSquare(j, k);
+      break;
+    case jtRound:
+      DoRound(j, k);
+      break;
+    }
+  k = j;
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoSquare(int j, int k) {
+  double dx = std::tan(std::atan2(m_sinA, m_normals[k].X * m_normals[j].X +
+                                              m_normals[k].Y * m_normals[j].Y) /
+                       4);
+  m_destPoly.push_back(IntPoint(
+      Round(m_srcPoly[j].X + m_delta * (m_normals[k].X - m_normals[k].Y * dx)),
+      Round(m_srcPoly[j].Y +
+            m_delta * (m_normals[k].Y + m_normals[k].X * dx))));
+  m_destPoly.push_back(IntPoint(
+      Round(m_srcPoly[j].X + m_delta * (m_normals[j].X + m_normals[j].Y * dx)),
+      Round(m_srcPoly[j].Y +
+            m_delta * (m_normals[j].Y - m_normals[j].X * dx))));
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoMiter(int j, int k, double r) {
+  double q = m_delta / r;
+  m_destPoly.push_back(
+      IntPoint(Round(m_srcPoly[j].X + (m_normals[k].X + m_normals[j].X) * q),
+               Round(m_srcPoly[j].Y + (m_normals[k].Y + m_normals[j].Y) * q)));
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoRound(int j, int k) {
+  double a = std::atan2(m_sinA, m_normals[k].X * m_normals[j].X +
+                                    m_normals[k].Y * m_normals[j].Y);
+  int steps = std::max((int)Round(m_StepsPerRad * std::fabs(a)), 1);
+
+  double X = m_normals[k].X, Y = m_normals[k].Y, X2;
+  for (int i = 0; i < steps; ++i) {
+    m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + X * m_delta),
+                                  Round(m_srcPoly[j].Y + Y * m_delta)));
+    X2 = X;
+    X = X * m_cos - m_sin * Y;
+    Y = X2 * m_sin + Y * m_cos;
+  }
+  m_destPoly.push_back(
+      IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
+               Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
+}
+
+//------------------------------------------------------------------------------
+// Miscellaneous public functions
+//------------------------------------------------------------------------------
+
+void Clipper::DoSimplePolygons() {
+  PolyOutList::size_type i = 0;
+  while (i < m_PolyOuts.size()) {
+    OutRec *outrec = m_PolyOuts[i++];
+    OutPt *op = outrec->Pts;
+    if (!op || outrec->IsOpen)
+      continue;
+    do // for each Pt in Polygon until duplicate found do ...
+    {
+      OutPt *op2 = op->Next;
+      while (op2 != outrec->Pts) {
+        if ((op->Pt == op2->Pt) && op2->Next != op && op2->Prev != op) {
+          // split the polygon into two ...
+          OutPt *op3 = op->Prev;
+          OutPt *op4 = op2->Prev;
+          op->Prev = op4;
+          op4->Next = op;
+          op2->Prev = op3;
+          op3->Next = op2;
+
+          outrec->Pts = op;
+          OutRec *outrec2 = CreateOutRec();
+          outrec2->Pts = op2;
+          UpdateOutPtIdxs(*outrec2);
+          if (Poly2ContainsPoly1(outrec2->Pts, outrec->Pts)) {
+            // OutRec2 is contained by OutRec1 ...
+            outrec2->IsHole = !outrec->IsHole;
+            outrec2->FirstLeft = outrec;
+            if (m_UsingPolyTree)
+              FixupFirstLefts2(outrec2, outrec);
+          } else if (Poly2ContainsPoly1(outrec->Pts, outrec2->Pts)) {
+            // OutRec1 is contained by OutRec2 ...
+            outrec2->IsHole = outrec->IsHole;
+            outrec->IsHole = !outrec2->IsHole;
+            outrec2->FirstLeft = outrec->FirstLeft;
+            outrec->FirstLeft = outrec2;
+            if (m_UsingPolyTree)
+              FixupFirstLefts2(outrec, outrec2);
+          } else {
+            // the 2 polygons are separate ...
+            outrec2->IsHole = outrec->IsHole;
+            outrec2->FirstLeft = outrec->FirstLeft;
+            if (m_UsingPolyTree)
+              FixupFirstLefts1(outrec, outrec2);
+          }
+          op2 = op; // ie get ready for the Next iteration
+        }
+        op2 = op2->Next;
+      }
+      op = op->Next;
+    } while (op != outrec->Pts);
+  }
+}
+//------------------------------------------------------------------------------
+
+void ReversePath(Path &p) { std::reverse(p.begin(), p.end()); }
+//------------------------------------------------------------------------------
+
+void ReversePaths(Paths &p) {
+  for (Paths::size_type i = 0; i < p.size(); ++i)
+    ReversePath(p[i]);
+}
+//------------------------------------------------------------------------------
+
+void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
+                     PolyFillType fillType) {
+  Clipper c;
+  c.StrictlySimple(true);
+  c.AddPath(in_poly, ptSubject, true);
+  c.Execute(ctUnion, out_polys, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
+                      PolyFillType fillType) {
+  Clipper c;
+  c.StrictlySimple(true);
+  c.AddPaths(in_polys, ptSubject, true);
+  c.Execute(ctUnion, out_polys, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+void SimplifyPolygons(Paths &polys, PolyFillType fillType) {
+  SimplifyPolygons(polys, polys, fillType);
+}
+//------------------------------------------------------------------------------
+
+inline double DistanceSqrd(const IntPoint &pt1, const IntPoint &pt2) {
+  double Dx = ((double)pt1.X - pt2.X);
+  double dy = ((double)pt1.Y - pt2.Y);
+  return (Dx * Dx + dy * dy);
+}
+//------------------------------------------------------------------------------
+
+double DistanceFromLineSqrd(const IntPoint &pt, const IntPoint &ln1,
+                            const IntPoint &ln2) {
+  // The equation of a line in general form (Ax + By + C = 0)
+  // given 2 points (x�,y�) & (x�,y�) is ...
+  //(y� - y�)x + (x� - x�)y + (y� - y�)x� - (x� - x�)y� = 0
+  // A = (y� - y�); B = (x� - x�); C = (y� - y�)x� - (x� - x�)y�
+  // perpendicular distance of point (x�,y�) = (Ax� + By� + C)/Sqrt(A� + B�)
+  // see http://en.wikipedia.org/wiki/Perpendicular_distance
+  double A = double(ln1.Y - ln2.Y);
+  double B = double(ln2.X - ln1.X);
+  double C = A * ln1.X + B * ln1.Y;
+  C = A * pt.X + B * pt.Y - C;
+  return (C * C) / (A * A + B * B);
+}
+//---------------------------------------------------------------------------
+
+bool SlopesNearCollinear(const IntPoint &pt1, const IntPoint &pt2,
+                         const IntPoint &pt3, double distSqrd) {
+  // this function is more accurate when the point that's geometrically
+  // between the other 2 points is the one that's tested for distance.
+  // ie makes it more likely to pick up 'spikes' ...
+  if (Abs(pt1.X - pt2.X) > Abs(pt1.Y - pt2.Y)) {
+    if ((pt1.X > pt2.X) == (pt1.X < pt3.X))
+      return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
+    else if ((pt2.X > pt1.X) == (pt2.X < pt3.X))
+      return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
+    else
+      return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
+  } else {
+    if ((pt1.Y > pt2.Y) == (pt1.Y < pt3.Y))
+      return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
+    else if ((pt2.Y > pt1.Y) == (pt2.Y < pt3.Y))
+      return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
+    else
+      return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
+  }
+}
+//------------------------------------------------------------------------------
+
+bool PointsAreClose(IntPoint pt1, IntPoint pt2, double distSqrd) {
+  double Dx = (double)pt1.X - pt2.X;
+  double dy = (double)pt1.Y - pt2.Y;
+  return ((Dx * Dx) + (dy * dy) <= distSqrd);
+}
+//------------------------------------------------------------------------------
+
+OutPt *ExcludeOp(OutPt *op) {
+  OutPt *result = op->Prev;
+  result->Next = op->Next;
+  op->Next->Prev = result;
+  result->Idx = 0;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygon(const Path &in_poly, Path &out_poly, double distance) {
+  // distance = proximity in units/pixels below which vertices
+  // will be stripped. Default ~= sqrt(2).
+
+  size_t size = in_poly.size();
+
+  if (size == 0) {
+    out_poly.clear();
+    return;
+  }
+
+  OutPt *outPts = new OutPt[size];
+  for (size_t i = 0; i < size; ++i) {
+    outPts[i].Pt = in_poly[i];
+    outPts[i].Next = &outPts[(i + 1) % size];
+    outPts[i].Next->Prev = &outPts[i];
+    outPts[i].Idx = 0;
+  }
+
+  double distSqrd = distance * distance;
+  OutPt *op = &outPts[0];
+  while (op->Idx == 0 && op->Next != op->Prev) {
+    if (PointsAreClose(op->Pt, op->Prev->Pt, distSqrd)) {
+      op = ExcludeOp(op);
+      size--;
+    } else if (PointsAreClose(op->Prev->Pt, op->Next->Pt, distSqrd)) {
+      ExcludeOp(op->Next);
+      op = ExcludeOp(op);
+      size -= 2;
+    } else if (SlopesNearCollinear(op->Prev->Pt, op->Pt, op->Next->Pt,
+                                   distSqrd)) {
+      op = ExcludeOp(op);
+      size--;
+    } else {
+      op->Idx = 1;
+      op = op->Next;
+    }
+  }
+
+  if (size < 3)
+    size = 0;
+  out_poly.resize(size);
+  for (size_t i = 0; i < size; ++i) {
+    out_poly[i] = op->Pt;
+    op = op->Next;
+  }
+  delete[] outPts;
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygon(Path &poly, double distance) {
+  CleanPolygon(poly, poly, distance);
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygons(const Paths &in_polys, Paths &out_polys, double distance) {
+  out_polys.resize(in_polys.size());
+  for (Paths::size_type i = 0; i < in_polys.size(); ++i)
+    CleanPolygon(in_polys[i], out_polys[i], distance);
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygons(Paths &polys, double distance) {
+  CleanPolygons(polys, polys, distance);
+}
+//------------------------------------------------------------------------------
+
+void Minkowski(const Path &poly, const Path &path, Paths &solution, bool isSum,
+               bool isClosed) {
+  int delta = (isClosed ? 1 : 0);
+  size_t polyCnt = poly.size();
+  size_t pathCnt = path.size();
+  Paths pp;
+  pp.reserve(pathCnt);
+  if (isSum)
+    for (size_t i = 0; i < pathCnt; ++i) {
+      Path p;
+      p.reserve(polyCnt);
+      for (size_t j = 0; j < poly.size(); ++j)
+        p.push_back(IntPoint(path[i].X + poly[j].X, path[i].Y + poly[j].Y));
+      pp.push_back(p);
+    }
+  else
+    for (size_t i = 0; i < pathCnt; ++i) {
+      Path p;
+      p.reserve(polyCnt);
+      for (size_t j = 0; j < poly.size(); ++j)
+        p.push_back(IntPoint(path[i].X - poly[j].X, path[i].Y - poly[j].Y));
+      pp.push_back(p);
+    }
+
+  solution.clear();
+  solution.reserve((pathCnt + delta) * (polyCnt + 1));
+  for (size_t i = 0; i < pathCnt - 1 + delta; ++i)
+    for (size_t j = 0; j < polyCnt; ++j) {
+      Path quad;
+      quad.reserve(4);
+      quad.push_back(pp[i % pathCnt][j % polyCnt]);
+      quad.push_back(pp[(i + 1) % pathCnt][j % polyCnt]);
+      quad.push_back(pp[(i + 1) % pathCnt][(j + 1) % polyCnt]);
+      quad.push_back(pp[i % pathCnt][(j + 1) % polyCnt]);
+      if (!Orientation(quad))
+        ReversePath(quad);
+      solution.push_back(quad);
+    }
+}
+//------------------------------------------------------------------------------
+
+void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
+                  bool pathIsClosed) {
+  Minkowski(pattern, path, solution, true, pathIsClosed);
+  Clipper c;
+  c.AddPaths(solution, ptSubject, true);
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+
+void TranslatePath(const Path &input, Path &output, const IntPoint delta) {
+  // precondition: input != output
+  output.resize(input.size());
+  for (size_t i = 0; i < input.size(); ++i)
+    output[i] = IntPoint(input[i].X + delta.X, input[i].Y + delta.Y);
+}
+//------------------------------------------------------------------------------
+
+void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
+                  bool pathIsClosed) {
+  Clipper c;
+  for (size_t i = 0; i < paths.size(); ++i) {
+    Paths tmp;
+    Minkowski(pattern, paths[i], tmp, true, pathIsClosed);
+    c.AddPaths(tmp, ptSubject, true);
+    if (pathIsClosed) {
+      Path tmp2;
+      TranslatePath(paths[i], tmp2, pattern[0]);
+      c.AddPath(tmp2, ptClip, true);
+    }
+  }
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+
+void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution) {
+  Minkowski(poly1, poly2, solution, false, true);
+  Clipper c;
+  c.AddPaths(solution, ptSubject, true);
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+
+enum NodeType { ntAny, ntOpen, ntClosed };
+
+void AddPolyNodeToPaths(const PolyNode &polynode, NodeType nodetype,
+                        Paths &paths) {
+  bool match = true;
+  if (nodetype == ntClosed)
+    match = !polynode.IsOpen();
+  else if (nodetype == ntOpen)
+    return;
+
+  if (!polynode.Contour.empty() && match)
+    paths.push_back(polynode.Contour);
+  for (int i = 0; i < polynode.ChildCount(); ++i)
+    AddPolyNodeToPaths(*polynode.Childs[i], nodetype, paths);
+}
+//------------------------------------------------------------------------------
+
+void PolyTreeToPaths(const PolyTree &polytree, Paths &paths) {
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  AddPolyNodeToPaths(polytree, ntAny, paths);
+}
+//------------------------------------------------------------------------------
+
+void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths) {
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  AddPolyNodeToPaths(polytree, ntClosed, paths);
+}
+//------------------------------------------------------------------------------
+
+void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths) {
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  // Open paths are top level only, so ...
+  for (int i = 0; i < polytree.ChildCount(); ++i)
+    if (polytree.Childs[i]->IsOpen())
+      paths.push_back(polytree.Childs[i]->Contour);
+}
+//------------------------------------------------------------------------------
+
+std::ostream &operator<<(std::ostream &s, const IntPoint &p) {
+  s << "(" << p.X << "," << p.Y << ")";
+  return s;
+}
+//------------------------------------------------------------------------------
+
+std::ostream &operator<<(std::ostream &s, const Path &p) {
+  if (p.empty())
+    return s;
+  Path::size_type last = p.size() - 1;
+  for (Path::size_type i = 0; i < last; i++)
+    s << "(" << p[i].X << "," << p[i].Y << "), ";
+  s << "(" << p[last].X << "," << p[last].Y << ")\n";
+  return s;
+}
+//------------------------------------------------------------------------------
+
+std::ostream &operator<<(std::ostream &s, const Paths &p) {
+  for (Paths::size_type i = 0; i < p.size(); i++)
+    s << p[i];
+  s << "\n";
+  return s;
+}
+//------------------------------------------------------------------------------
+
+} // namespace ClipperLib
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h
new file mode 100755
index 0000000000..d19e95ca2c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/clipper.h
@@ -0,0 +1,421 @@
+/*******************************************************************************
+ *                                                                              *
+ * Author    :  Angus Johnson * Version   :  6.4.2 * Date      :  27 February
+ *2017                                                * Website   :
+ *http://www.angusj.com                                           * Copyright :
+ *Angus Johnson 2010-2017                                         *
+ *                                                                              *
+ * License: * Use, modification & distribution is subject to Boost Software
+ *License Ver 1. * http://www.boost.org/LICENSE_1_0.txt *
+ *                                                                              *
+ * Attributions: * The code in this library is an extension of Bala Vatti's
+ *clipping algorithm: * "A generic solution to polygon clipping" *
+ * Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
+ * http://portal.acm.org/citation.cfm?id=129906 *
+ *                                                                              *
+ * Computer graphics and geometric modeling: implementation and algorithms * By
+ *Max K. Agoston                                                            *
+ * Springer; 1 edition (January 4, 2005) *
+ * http://books.google.com/books?q=vatti+clipping+agoston *
+ *                                                                              *
+ * See also: * "Polygon Offsetting by Computing Winding Numbers" * Paper no.
+ *DETC2005-85513 pp. 565-575                                         * ASME 2005
+ *International Design Engineering Technical Conferences             * and
+ *Computers and Information in Engineering Conference (IDETC/CIE2005)      *
+ * September 24-28, 2005 , Long Beach, California, USA *
+ * http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
+ *                                                                              *
+ *******************************************************************************/
+
+#pragma once
+
+#ifndef clipper_hpp
+#define clipper_hpp
+
+#define CLIPPER_VERSION "6.4.2"
+
+// use_int32: When enabled 32bit ints are used instead of 64bit ints. This
+// improve performance but coordinate values are limited to the range +/- 46340
+//#define use_int32
+
+// use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
+//#define use_xyz
+
+// use_lines: Enables line clipping. Adds a very minor cost to performance.
+#define use_lines
+
+// use_deprecated: Enables temporary support for the obsolete functions
+//#define use_deprecated
+
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <list>
+#include <ostream>
+#include <queue>
+#include <set>
+#include <stdexcept>
+#include <vector>
+
+namespace ClipperLib {
+
+enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
+enum PolyType { ptSubject, ptClip };
+// By far the most widely used winding rules for polygon filling are
+// EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
+// Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
+// see http://glprogramming.com/red/chapter11.html
+enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
+
+#ifdef use_int32
+typedef int cInt;
+static cInt const loRange = 0x7FFF;
+static cInt const hiRange = 0x7FFF;
+#else
+typedef signed long long cInt;
+static cInt const loRange = 0x3FFFFFFF;
+static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
+typedef signed long long long64; // used by Int128 class
+typedef unsigned long long ulong64;
+
+#endif
+
+struct IntPoint {
+  cInt X;
+  cInt Y;
+#ifdef use_xyz
+  cInt Z;
+  IntPoint(cInt x = 0, cInt y = 0, cInt z = 0) : X(x), Y(y), Z(z){};
+#else
+  IntPoint(cInt x = 0, cInt y = 0) : X(x), Y(y){};
+#endif
+
+  friend inline bool operator==(const IntPoint &a, const IntPoint &b) {
+    return a.X == b.X && a.Y == b.Y;
+  }
+  friend inline bool operator!=(const IntPoint &a, const IntPoint &b) {
+    return a.X != b.X || a.Y != b.Y;
+  }
+};
+//------------------------------------------------------------------------------
+
+typedef std::vector<IntPoint> Path;
+typedef std::vector<Path> Paths;
+
+inline Path &operator<<(Path &poly, const IntPoint &p) {
+  poly.push_back(p);
+  return poly;
+}
+inline Paths &operator<<(Paths &polys, const Path &p) {
+  polys.push_back(p);
+  return polys;
+}
+
+std::ostream &operator<<(std::ostream &s, const IntPoint &p);
+std::ostream &operator<<(std::ostream &s, const Path &p);
+std::ostream &operator<<(std::ostream &s, const Paths &p);
+
+struct DoublePoint {
+  double X;
+  double Y;
+  DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
+  DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
+};
+//------------------------------------------------------------------------------
+
+#ifdef use_xyz
+typedef void (*ZFillCallback)(IntPoint &e1bot, IntPoint &e1top, IntPoint &e2bot,
+                              IntPoint &e2top, IntPoint &pt);
+#endif
+
+enum InitOptions {
+  ioReverseSolution = 1,
+  ioStrictlySimple = 2,
+  ioPreserveCollinear = 4
+};
+enum JoinType { jtSquare, jtRound, jtMiter };
+enum EndType {
+  etClosedPolygon,
+  etClosedLine,
+  etOpenButt,
+  etOpenSquare,
+  etOpenRound
+};
+
+class PolyNode;
+typedef std::vector<PolyNode *> PolyNodes;
+
+class PolyNode {
+public:
+  PolyNode();
+  virtual ~PolyNode(){};
+  Path Contour;
+  PolyNodes Childs;
+  PolyNode *Parent;
+  PolyNode *GetNext() const;
+  bool IsHole() const;
+  bool IsOpen() const;
+  int ChildCount() const;
+
+private:
+  // PolyNode& operator =(PolyNode& other);
+  unsigned Index; // node index in Parent.Childs
+  bool m_IsOpen;
+  JoinType m_jointype;
+  EndType m_endtype;
+  PolyNode *GetNextSiblingUp() const;
+  void AddChild(PolyNode &child);
+  friend class Clipper; // to access Index
+  friend class ClipperOffset;
+};
+
+class PolyTree : public PolyNode {
+public:
+  ~PolyTree() { Clear(); };
+  PolyNode *GetFirst() const;
+  void Clear();
+  int Total() const;
+
+private:
+  // PolyTree& operator =(PolyTree& other);
+  PolyNodes AllNodes;
+  friend class Clipper; // to access AllNodes
+};
+
+bool Orientation(const Path &poly);
+double Area(const Path &poly);
+int PointInPolygon(const IntPoint &pt, const Path &path);
+
+void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
+                     PolyFillType fillType = pftEvenOdd);
+void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
+                      PolyFillType fillType = pftEvenOdd);
+void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
+
+void CleanPolygon(const Path &in_poly, Path &out_poly, double distance = 1.415);
+void CleanPolygon(Path &poly, double distance = 1.415);
+void CleanPolygons(const Paths &in_polys, Paths &out_polys,
+                   double distance = 1.415);
+void CleanPolygons(Paths &polys, double distance = 1.415);
+
+void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
+                  bool pathIsClosed);
+void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
+                  bool pathIsClosed);
+void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution);
+
+void PolyTreeToPaths(const PolyTree &polytree, Paths &paths);
+void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths);
+void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths);
+
+void ReversePath(Path &p);
+void ReversePaths(Paths &p);
+
+struct IntRect {
+  cInt left;
+  cInt top;
+  cInt right;
+  cInt bottom;
+};
+
+// enums that are used internally ...
+enum EdgeSide { esLeft = 1, esRight = 2 };
+
+// forward declarations (for stuff used internally) ...
+struct TEdge;
+struct IntersectNode;
+struct LocalMinimum;
+struct OutPt;
+struct OutRec;
+struct Join;
+
+typedef std::vector<OutRec *> PolyOutList;
+typedef std::vector<TEdge *> EdgeList;
+typedef std::vector<Join *> JoinList;
+typedef std::vector<IntersectNode *> IntersectList;
+
+//------------------------------------------------------------------------------
+
+// ClipperBase is the ancestor to the Clipper class. It should not be
+// instantiated directly. This class simply abstracts the conversion of sets of
+// polygon coordinates into edge objects that are stored in a LocalMinima list.
+class ClipperBase {
+public:
+  ClipperBase();
+  virtual ~ClipperBase();
+  virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
+  bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
+  virtual void Clear();
+  IntRect GetBounds();
+  bool PreserveCollinear() { return m_PreserveCollinear; };
+  void PreserveCollinear(bool value) { m_PreserveCollinear = value; };
+
+protected:
+  void DisposeLocalMinimaList();
+  TEdge *AddBoundsToLML(TEdge *e, bool IsClosed);
+  virtual void Reset();
+  TEdge *ProcessBound(TEdge *E, bool IsClockwise);
+  void InsertScanbeam(const cInt Y);
+  bool PopScanbeam(cInt &Y);
+  bool LocalMinimaPending();
+  bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
+  OutRec *CreateOutRec();
+  void DisposeAllOutRecs();
+  void DisposeOutRec(PolyOutList::size_type index);
+  void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
+  void DeleteFromAEL(TEdge *e);
+  void UpdateEdgeIntoAEL(TEdge *&e);
+
+  typedef std::vector<LocalMinimum> MinimaList;
+  MinimaList::iterator m_CurrentLM;
+  MinimaList m_MinimaList;
+
+  bool m_UseFullRange;
+  EdgeList m_edges;
+  bool m_PreserveCollinear;
+  bool m_HasOpenPaths;
+  PolyOutList m_PolyOuts;
+  TEdge *m_ActiveEdges;
+
+  typedef std::priority_queue<cInt> ScanbeamList;
+  ScanbeamList m_Scanbeam;
+};
+//------------------------------------------------------------------------------
+
+class Clipper : public virtual ClipperBase {
+public:
+  Clipper(int initOptions = 0);
+  bool Execute(ClipType clipType, Paths &solution,
+               PolyFillType fillType = pftEvenOdd);
+  bool Execute(ClipType clipType, Paths &solution, PolyFillType subjFillType,
+               PolyFillType clipFillType);
+  bool Execute(ClipType clipType, PolyTree &polytree,
+               PolyFillType fillType = pftEvenOdd);
+  bool Execute(ClipType clipType, PolyTree &polytree, PolyFillType subjFillType,
+               PolyFillType clipFillType);
+  bool ReverseSolution() { return m_ReverseOutput; };
+  void ReverseSolution(bool value) { m_ReverseOutput = value; };
+  bool StrictlySimple() { return m_StrictSimple; };
+  void StrictlySimple(bool value) { m_StrictSimple = value; };
+// set the callback function for z value filling on intersections (otherwise Z
+// is 0)
+#ifdef use_xyz
+  void ZFillFunction(ZFillCallback zFillFunc);
+#endif
+protected:
+  virtual bool ExecuteInternal();
+
+private:
+  JoinList m_Joins;
+  JoinList m_GhostJoins;
+  IntersectList m_IntersectList;
+  ClipType m_ClipType;
+  typedef std::list<cInt> MaximaList;
+  MaximaList m_Maxima;
+  TEdge *m_SortedEdges;
+  bool m_ExecuteLocked;
+  PolyFillType m_ClipFillType;
+  PolyFillType m_SubjFillType;
+  bool m_ReverseOutput;
+  bool m_UsingPolyTree;
+  bool m_StrictSimple;
+#ifdef use_xyz
+  ZFillCallback m_ZFill; // custom callback
+#endif
+  void SetWindingCount(TEdge &edge);
+  bool IsEvenOddFillType(const TEdge &edge) const;
+  bool IsEvenOddAltFillType(const TEdge &edge) const;
+  void InsertLocalMinimaIntoAEL(const cInt botY);
+  void InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge);
+  void AddEdgeToSEL(TEdge *edge);
+  bool PopEdgeFromSEL(TEdge *&edge);
+  void CopyAELToSEL();
+  void DeleteFromSEL(TEdge *e);
+  void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
+  bool IsContributing(const TEdge &edge) const;
+  bool IsTopHorz(const cInt XPos);
+  void DoMaxima(TEdge *e);
+  void ProcessHorizontals();
+  void ProcessHorizontal(TEdge *horzEdge);
+  void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
+  OutPt *AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
+  OutRec *GetOutRec(int idx);
+  void AppendPolygon(TEdge *e1, TEdge *e2);
+  void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
+  OutPt *AddOutPt(TEdge *e, const IntPoint &pt);
+  OutPt *GetLastOutPt(TEdge *e);
+  bool ProcessIntersections(const cInt topY);
+  void BuildIntersectList(const cInt topY);
+  void ProcessIntersectList();
+  void ProcessEdgesAtTopOfScanbeam(const cInt topY);
+  void BuildResult(Paths &polys);
+  void BuildResult2(PolyTree &polytree);
+  void SetHoleState(TEdge *e, OutRec *outrec);
+  void DisposeIntersectNodes();
+  bool FixupIntersectionOrder();
+  void FixupOutPolygon(OutRec &outrec);
+  void FixupOutPolyline(OutRec &outrec);
+  bool IsHole(TEdge *e);
+  bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
+  void FixHoleLinkage(OutRec &outrec);
+  void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
+  void ClearJoins();
+  void ClearGhostJoins();
+  void AddGhostJoin(OutPt *op, const IntPoint offPt);
+  bool JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2);
+  void JoinCommonEdges();
+  void DoSimplePolygons();
+  void FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec);
+  void FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec);
+  void FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec);
+#ifdef use_xyz
+  void SetZ(IntPoint &pt, TEdge &e1, TEdge &e2);
+#endif
+};
+//------------------------------------------------------------------------------
+
+class ClipperOffset {
+public:
+  ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
+  ~ClipperOffset();
+  void AddPath(const Path &path, JoinType joinType, EndType endType);
+  void AddPaths(const Paths &paths, JoinType joinType, EndType endType);
+  void Execute(Paths &solution, double delta);
+  void Execute(PolyTree &solution, double delta);
+  void Clear();
+  double MiterLimit;
+  double ArcTolerance;
+
+private:
+  Paths m_destPolys;
+  Path m_srcPoly;
+  Path m_destPoly;
+  std::vector<DoublePoint> m_normals;
+  double m_delta, m_sinA, m_sin, m_cos;
+  double m_miterLim, m_StepsPerRad;
+  IntPoint m_lowest;
+  PolyNode m_polyNodes;
+
+  void FixOrientations();
+  void DoOffset(double delta);
+  void OffsetPoint(int j, int &k, JoinType jointype);
+  void DoSquare(int j, int k);
+  void DoMiter(int j, int k, double r);
+  void DoRound(int j, int k);
+};
+//------------------------------------------------------------------------------
+
+class clipperException : public std::exception {
+public:
+  clipperException(const char *description) : m_descr(description) {}
+  virtual ~clipperException() throw() {}
+  virtual const char *what() const throw() { return m_descr.c_str(); }
+
+private:
+  std::string m_descr;
+};
+//------------------------------------------------------------------------------
+
+} // namespace ClipperLib
+
+#endif // clipper_hpp
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc
new file mode 100755
index 0000000000..8099821056
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/get_rotate_crop_image.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
+                           const std::array<int, 8> &box) {
+  cv::Mat image;
+  srcimage.copyTo(image);
+
+  std::vector<std::vector<int>> points;
+
+  for (int i = 0; i < 4; ++i) {
+    std::vector<int> tmp;
+    tmp.push_back(box[2 * i]);
+    tmp.push_back(box[2 * i + 1]);
+    points.push_back(tmp);
+  }
+  int x_collect[4] = {box[0], box[2], box[4], box[6]};
+  int y_collect[4] = {box[1], box[3], box[5], box[7]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+
+  cv::Mat img_crop;
+  image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
+
+  for (int i = 0; i < points.size(); i++) {
+    points[i][0] -= left;
+    points[i][1] -= top;
+  }
+
+  int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
+                                pow(points[0][1] - points[1][1], 2)));
+  int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
+                                 pow(points[0][1] - points[3][1], 2)));
+
+  cv::Point2f pts_std[4];
+  pts_std[0] = cv::Point2f(0., 0.);
+  pts_std[1] = cv::Point2f(img_crop_width, 0.);
+  pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
+  pts_std[3] = cv::Point2f(0.f, img_crop_height);
+
+  cv::Point2f pointsf[4];
+  pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
+  pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
+  pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
+  pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
+
+  cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
+
+  cv::Mat dst_img;
+  cv::warpPerspective(img_crop, dst_img, M,
+                      cv::Size(img_crop_width, img_crop_height),
+                      cv::BORDER_REPLICATE);
+
+  if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
+    cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
+    cv::transpose(dst_img, srcCopy);
+    cv::flip(srcCopy, srcCopy, 0);
+    return srcCopy;
+  } else {
+    return dst_img;
+  }
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc
new file mode 100755
index 0000000000..d8de7e6e54
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/matcher.cc
@@ -0,0 +1,89 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+std::vector<int> Xyxyxyxy2Xyxy(std::array<int, 8> &box) {
+  int x_collect[4] = {box[0], box[2], box[4], box[6]};
+  int y_collect[4] = {box[1], box[3], box[5], box[7]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+  std::vector<int> box1(4, 0);
+  box1[0] = left;
+  box1[1] = top;
+  box1[2] = right;
+  box1[3] = bottom;
+  return box1;
+}
+
+float Dis(std::vector<int> &box1, std::vector<int> &box2) {
+  float x1_1 = float(box1[0]);
+  float y1_1 = float(box1[1]);
+  float x2_1 = float(box1[2]);
+  float y2_1 = float(box1[3]);
+
+  float x1_2 = float(box2[0]);
+  float y1_2 = float(box2[1]);
+  float x2_2 = float(box2[2]);
+  float y2_2 = float(box2[3]);
+
+  float dis = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1) +
+              std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1);
+  float dis_2 = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1);
+  float dis_3 = std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1);
+  return dis + std::min(dis_2, dis_3);
+}
+
+float Iou(std::vector<int> &box1, std::vector<int> &box2) {
+  int area1 = std::max(0, box1[2] - box1[0]) * std::max(0, box1[3] - box1[1]);
+  int area2 = std::max(0, box2[2] - box2[0]) * std::max(0, box2[3] - box2[1]);
+
+  // computing the sum_area
+  int sum_area = area1 + area2;
+
+  // find the each point of intersect rectangle
+  int x1 = std::max(box1[0], box2[0]);
+  int y1 = std::max(box1[1], box2[1]);
+  int x2 = std::min(box1[2], box2[2]);
+  int y2 = std::min(box1[3], box2[3]);
+
+  // judge if there is an intersect
+  if (y1 >= y2 || x1 >= x2) {
+    return 0.0;
+  } else {
+    int intersect = (x2 - x1) * (y2 - y1);
+    return intersect / (sum_area - intersect + 0.00000001);
+  }
+}
+
+bool ComparisonDis(const std::vector<float> &dis1,
+                   const std::vector<float> &dis2) {
+  if (dis1[1] < dis2[1]) {
+    return true;
+  } else if (dis1[1] == dis2[1]) {
+    return dis1[0] < dis2[0];
+  } else {
+    return false;
+  }
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc
new file mode 100755
index 0000000000..7c1a55c17f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.cc
@@ -0,0 +1,538 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ocr_postprocess_op.h"
+#include "clipper.h"
+#include <map>
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+void PostProcessor::GetContourArea(const std::vector<std::vector<float>> &box,
+                                   float unclip_ratio, float &distance) {
+  int pts_num = 4;
+  float area = 0.0f;
+  float dist = 0.0f;
+  for (int i = 0; i < pts_num; i++) {
+    area += box[i][0] * box[(i + 1) % pts_num][1] -
+            box[i][1] * box[(i + 1) % pts_num][0];
+    dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
+                      (box[i][0] - box[(i + 1) % pts_num][0]) +
+                  (box[i][1] - box[(i + 1) % pts_num][1]) *
+                      (box[i][1] - box[(i + 1) % pts_num][1]));
+  }
+  area = fabs(float(area / 2.0));
+
+  distance = area * unclip_ratio / dist;
+}
+
+void PostProcessor::GetContourAreaPoly(const std::vector<cv::Point> &box,
+                                       float unclip_ratio, float &distance) {
+  int pts_num = box.size();
+  float area = 0.0f;
+  float dist = 0.0f;
+  for (int i = 0; i < pts_num; i++) {
+    area += box[i].x * box[(i + 1) % pts_num].y -
+            box[i].y * box[(i + 1) % pts_num].x;
+    dist += sqrtf((box[i].x - box[(i + 1) % pts_num].x) *
+                      (box[i].x - box[(i + 1) % pts_num].x) +
+                  (box[i].y - box[(i + 1) % pts_num].y) *
+                      (box[i].y - box[(i + 1) % pts_num].y));
+  }
+  area = fabs(float(area / 2.0));
+
+  distance = area * unclip_ratio / dist;
+}
+
+cv::RotatedRect PostProcessor::UnClip(std::vector<std::vector<float>> box,
+                                      const float &unclip_ratio) {
+  float distance = 1.0;
+
+  GetContourArea(box, unclip_ratio, distance);
+
+  ClipperLib::ClipperOffset offset;
+  ClipperLib::Path p;
+  p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
+    << ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
+    << ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
+    << ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
+  offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
+
+  ClipperLib::Paths soln;
+  offset.Execute(soln, distance);
+  std::vector<cv::Point2f> points;
+
+  for (int j = 0; j < soln.size(); j++) {
+    for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
+      points.emplace_back(soln[j][i].X, soln[j][i].Y);
+    }
+  }
+  cv::RotatedRect res;
+  if (points.size() <= 0) {
+    res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0);
+  } else {
+    res = cv::minAreaRect(points);
+  }
+  return res;
+}
+
+std::vector<cv::Point2f> PostProcessor::UnClipPoly(std::vector<cv::Point> box,
+                                                   const float &unclip_ratio) {
+  float distance = 1.0;
+
+  GetContourAreaPoly(box, unclip_ratio, distance);
+
+  ClipperLib::ClipperOffset offset;
+  ClipperLib::Path p;
+  for (const auto &pt : box) {
+    p << ClipperLib::IntPoint(int(pt.x), int(pt.y));
+  }
+
+  offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
+  ClipperLib::Paths soln;
+  offset.Execute(soln, distance);
+  std::vector<cv::Point2f> points;
+
+  if (!soln.empty()) {
+    for (int i = 0; i < soln[0].size(); i++) {
+      points.emplace_back(soln[0][i].X, soln[0][i].Y);
+    }
+  }
+
+  return points;
+}
+
+float **PostProcessor::Mat2Vec(cv::Mat mat) {
+  auto **array = new float *[mat.rows];
+  for (int i = 0; i < mat.rows; ++i)
+    array[i] = new float[mat.cols];
+  for (int i = 0; i < mat.rows; ++i) {
+    for (int j = 0; j < mat.cols; ++j) {
+      array[i][j] = mat.at<float>(i, j);
+    }
+  }
+
+  return array;
+}
+
+std::vector<std::vector<int>>
+PostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
+  std::vector<std::vector<int>> box = pts;
+  std::sort(box.begin(), box.end(), XsortInt);
+
+  std::vector<std::vector<int>> leftmost = {box[0], box[1]};
+  std::vector<std::vector<int>> rightmost = {box[2], box[3]};
+
+  if (leftmost[0][1] > leftmost[1][1])
+    std::swap(leftmost[0], leftmost[1]);
+
+  if (rightmost[0][1] > rightmost[1][1])
+    std::swap(rightmost[0], rightmost[1]);
+
+  std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
+                                        leftmost[1]};
+  return rect;
+}
+
+std::vector<std::vector<float>> PostProcessor::Mat2Vector(cv::Mat mat) {
+  std::vector<std::vector<float>> img_vec;
+  std::vector<float> tmp;
+
+  for (int i = 0; i < mat.rows; ++i) {
+    tmp.clear();
+    for (int j = 0; j < mat.cols; ++j) {
+      tmp.push_back(mat.at<float>(i, j));
+    }
+    img_vec.push_back(tmp);
+  }
+  return img_vec;
+}
+
+bool PostProcessor::XsortFp32(std::vector<float> a, std::vector<float> b) {
+  if (a[0] != b[0])
+    return a[0] < b[0];
+  return false;
+}
+
+bool PostProcessor::XsortInt(std::vector<int> a, std::vector<int> b) {
+  if (a[0] != b[0])
+    return a[0] < b[0];
+  return false;
+}
+
+std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
+                                                            float &ssid) {
+  ssid = std::max(box.size.width, box.size.height);
+
+  cv::Mat points;
+  cv::boxPoints(box, points);
+
+  auto array = Mat2Vector(points);
+  std::sort(array.begin(), array.end(), XsortFp32);
+
+  std::vector<float> idx1 = array[0], idx2 = array[1], idx3 = array[2],
+                     idx4 = array[3];
+  if (array[3][1] <= array[2][1]) {
+    idx2 = array[3];
+    idx3 = array[2];
+  } else {
+    idx2 = array[2];
+    idx3 = array[3];
+  }
+  if (array[1][1] <= array[0][1]) {
+    idx1 = array[1];
+    idx4 = array[0];
+  } else {
+    idx1 = array[0];
+    idx4 = array[1];
+  }
+
+  array[0] = idx1;
+  array[1] = idx2;
+  array[2] = idx3;
+  array[3] = idx4;
+
+  return array;
+}
+
+float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
+                                     cv::Mat pred) {
+  int width = pred.cols;
+  int height = pred.rows;
+  std::vector<float> box_x;
+  std::vector<float> box_y;
+  for (int i = 0; i < contour.size(); ++i) {
+    box_x.push_back(contour[i].x);
+    box_y.push_back(contour[i].y);
+  }
+
+  int xmin =
+      clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0,
+            width - 1);
+  int xmax =
+      clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0,
+            width - 1);
+  int ymin =
+      clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0,
+            height - 1);
+  int ymax =
+      clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0,
+            height - 1);
+
+  cv::Mat mask;
+  mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
+
+  cv::Point *rook_point = new cv::Point[contour.size()];
+
+  for (int i = 0; i < contour.size(); ++i) {
+    rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin);
+  }
+  const cv::Point *ppt[1] = {rook_point};
+  int npt[] = {int(contour.size())};
+
+  cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
+
+  cv::Mat croppedImg;
+  pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
+      .copyTo(croppedImg);
+  float score = cv::mean(croppedImg, mask)[0];
+
+  delete[] rook_point;
+  return score;
+}
+
+float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
+                                  cv::Mat pred) {
+  auto array = box_array;
+  int width = pred.cols;
+  int height = pred.rows;
+
+  float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
+  float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
+
+  int xmin = clamp(int(std::floor(*(std::min_element(box_x, box_x + 4)))), 0,
+                   width - 1);
+  int xmax = clamp(int(std::ceil(*(std::max_element(box_x, box_x + 4)))), 0,
+                   width - 1);
+  int ymin = clamp(int(std::floor(*(std::min_element(box_y, box_y + 4)))), 0,
+                   height - 1);
+  int ymax = clamp(int(std::ceil(*(std::max_element(box_y, box_y + 4)))), 0,
+                   height - 1);
+
+  cv::Mat mask;
+  mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
+
+  cv::Point root_point[4];
+  root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
+  root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
+  root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
+  root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
+  const cv::Point *ppt[1] = {root_point};
+  int npt[] = {4};
+  cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
+
+  cv::Mat croppedImg;
+  pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
+      .copyTo(croppedImg);
+
+  auto score = cv::mean(croppedImg, mask)[0];
+  return score;
+}
+
+float PostProcessor::PolyScoreFast(std::vector<cv::Point> box_array,
+                                   cv::Mat pred) {
+  int width = pred.cols;
+  int height = pred.rows;
+
+  std::vector<float> box_x;
+  std::vector<float> box_y;
+  for (const cv::Point &p : box_array) {
+    box_x.push_back(p.x);
+    box_y.push_back(p.y);
+  }
+
+  int xmin =
+      clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0,
+            width - 1);
+  int xmax =
+      clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0,
+            width - 1);
+  int ymin =
+      clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0,
+            height - 1);
+  int ymax =
+      clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0,
+            height - 1);
+
+  cv::Mat mask;
+  mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
+
+  std::vector<cv::Point> shifted_box_array;
+  for (const cv::Point &p : box_array) {
+    shifted_box_array.emplace_back(p.x - xmin, p.y - ymin);
+  }
+
+  std::vector<std::vector<cv::Point>> box_contour(1, shifted_box_array);
+  cv::fillPoly(mask, box_contour, cv::Scalar(1));
+
+  cv::Mat croppedImg;
+  pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
+      .copyTo(croppedImg);
+
+  auto score = cv::mean(croppedImg, mask)[0];
+  return score;
+}
+
+std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
+    const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
+    const float &det_db_unclip_ratio, const std::string &det_db_score_mode) {
+  const int min_size = 3;
+  const int max_candidates = 1000;
+
+  int width = bitmap.cols;
+  int height = bitmap.rows;
+
+  std::vector<std::vector<cv::Point>> contours;
+  std::vector<cv::Vec4i> hierarchy;
+
+  cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
+                   cv::CHAIN_APPROX_SIMPLE);
+
+  int num_contours =
+      contours.size() >= max_candidates ? max_candidates : contours.size();
+
+  std::vector<std::vector<std::vector<int>>> boxes;
+
+  for (int _i = 0; _i < num_contours; _i++) {
+    if (contours[_i].size() <= 2) {
+      continue;
+    }
+    float ssid;
+    cv::RotatedRect box = cv::minAreaRect(contours[_i]);
+    auto array = GetMiniBoxes(box, ssid);
+
+    auto box_for_unclip = array;
+    // end get_mini_box
+
+    if (ssid < min_size) {
+      continue;
+    }
+
+    float score;
+    if (det_db_score_mode == "slow") /* compute using polygon*/
+      score = PolygonScoreAcc(contours[_i], pred);
+    else
+      score = BoxScoreFast(array, pred);
+
+    if (score < box_thresh)
+      continue;
+
+    // start for unclip
+    cv::RotatedRect points = UnClip(box_for_unclip, det_db_unclip_ratio);
+    if (points.size.height < 1.001 && points.size.width < 1.001) {
+      continue;
+    }
+    // end for unclip
+
+    cv::RotatedRect clipbox = points;
+    auto cliparray = GetMiniBoxes(clipbox, ssid);
+
+    if (ssid < min_size + 2)
+      continue;
+
+    int dest_width = pred.cols;
+    int dest_height = pred.rows;
+    std::vector<std::vector<int>> intcliparray;
+
+    for (int num_pt = 0; num_pt < 4; num_pt++) {
+      std::vector<int> a{int(clampf(roundf(cliparray[num_pt][0] / float(width) *
+                                           float(dest_width)),
+                                    0, float(dest_width))),
+                         int(clampf(roundf(cliparray[num_pt][1] /
+                                           float(height) * float(dest_height)),
+                                    0, float(dest_height)))};
+      intcliparray.push_back(a);
+    }
+    boxes.push_back(intcliparray);
+
+  } // end for
+  return boxes;
+}
+
+std::vector<std::vector<std::vector<int>>> PostProcessor::PolygonFromBitmap(
+    const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
+    const float &det_db_unclip_ratio, const std::string &det_db_score_mode) {
+  const int min_size = 3;
+  const int max_candidates = 1000;
+
+  int width = bitmap.cols;
+  int height = bitmap.rows;
+
+  std::vector<std::vector<cv::Point>> contours;
+  std::vector<cv::Vec4i> hierarchy;
+
+  cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
+                   cv::CHAIN_APPROX_SIMPLE);
+  int num_contours =
+      contours.size() >= max_candidates ? max_candidates : contours.size();
+
+  std::vector<std::vector<std::vector<int>>> boxes;
+  for (int _i = 0; _i < num_contours; _i++) {
+
+    if (contours[_i].size() <= 2) {
+      continue;
+    }
+    double perimeter = cv::arcLength(contours[_i], true);
+    double epsilon = 0.002 * perimeter;
+    std::vector<cv::Point> approx;
+    cv::approxPolyDP(contours[_i], approx, epsilon, true);
+
+    if (approx.size() < 4) {
+      continue; // 跳过点数少于4的轮廓
+    }
+    auto box_for_unclip = approx;
+    float score;
+    if (det_db_score_mode == "slow")
+      score = PolygonScoreAcc(approx, pred);
+    else
+      score = PolyScoreFast(approx, pred);
+    if (score < box_thresh)
+      continue;
+    // start for unclip
+    std::vector<cv::Point2f> points =
+        UnClipPoly(box_for_unclip, det_db_unclip_ratio);
+    if (points.size() <= 0)
+      continue;
+    cv::RotatedRect res = cv::minAreaRect(points);
+    float ssid = std::max(res.size.width, res.size.height);
+    if (ssid < min_size + 2)
+      continue;
+
+    int dest_width = pred.cols;
+    int dest_height = pred.rows;
+    std::vector<std::vector<int>> intcliparray;
+
+    for (int num_pt = 0; num_pt < points.size(); num_pt++) {
+      std::vector<int> a{int(clampf(roundf(points[num_pt].x / float(width) *
+                                           float(dest_width)),
+                                    0, float(dest_width))),
+                         int(clampf(roundf(points[num_pt].y / float(height) *
+                                           float(dest_height)),
+                                    0, float(dest_height)))};
+      intcliparray.push_back(a);
+    }
+    boxes.push_back(intcliparray);
+  } // end for
+  return boxes;
+}
+
+std::vector<std::vector<std::vector<int>>> PostProcessor::FilterCURVETagDetRes(
+    std::vector<std::vector<std::vector<int>>> boxes,
+    const std::array<int, 4> &det_img_info) {
+  int oriimg_w = det_img_info[0];
+  int oriimg_h = det_img_info[1];
+  float ratio_w = float(det_img_info[2]) / float(oriimg_w);
+  float ratio_h = float(det_img_info[3]) / float(oriimg_h);
+
+  std::vector<std::vector<std::vector<int>>> root_points;
+  for (int n = 0; n < boxes.size(); n++) {
+    for (int m = 0; m < boxes[n].size(); m++) {
+      boxes[n][m][0] /= ratio_w;
+      boxes[n][m][1] /= ratio_h;
+      boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
+      boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
+    }
+    root_points.push_back(boxes[n]);
+  }
+
+  return root_points;
+}
+
+std::vector<std::vector<std::vector<int>>>
+PostProcessor::FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
+                               const std::array<int, 4> &det_img_info) {
+  int oriimg_w = det_img_info[0];
+  int oriimg_h = det_img_info[1];
+  float ratio_w = float(det_img_info[2]) / float(oriimg_w);
+  float ratio_h = float(det_img_info[3]) / float(oriimg_h);
+
+  std::vector<std::vector<std::vector<int>>> root_points;
+  for (int n = 0; n < boxes.size(); n++) {
+    boxes[n] = OrderPointsClockwise(boxes[n]);
+    for (int m = 0; m < boxes[0].size(); m++) {
+      boxes[n][m][0] /= ratio_w;
+      boxes[n][m][1] /= ratio_h;
+
+      boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
+      boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
+    }
+  }
+
+  for (int n = 0; n < boxes.size(); n++) {
+    int rect_width, rect_height;
+    rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
+                          pow(boxes[n][0][1] - boxes[n][1][1], 2)));
+    rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
+                           pow(boxes[n][0][1] - boxes[n][3][1], 2)));
+    if (rect_width <= 4 || rect_height <= 4)
+      continue;
+    root_points.push_back(boxes[n]);
+  }
+  return root_points;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h
new file mode 100755
index 0000000000..b8c471dff8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h
@@ -0,0 +1,107 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+#include "ultrainfer/vision/ocr/ppocr/utils/clipper.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+class PostProcessor {
+public:
+  void GetContourArea(const std::vector<std::vector<float>> &box,
+                      float unclip_ratio, float &distance);
+  void GetContourAreaPoly(const std::vector<cv::Point> &box, float unclip_ratio,
+                          float &distance);
+  cv::RotatedRect UnClip(std::vector<std::vector<float>> box,
+                         const float &unclip_ratio);
+
+  std::vector<cv::Point2f> UnClipPoly(std::vector<cv::Point> box,
+                                      const float &unclip_ratio);
+
+  float **Mat2Vec(cv::Mat mat);
+
+  std::vector<std::vector<int>>
+  OrderPointsClockwise(std::vector<std::vector<int>> pts);
+
+  std::vector<std::vector<float>> GetMiniBoxes(cv::RotatedRect box,
+                                               float &ssid);
+
+  float BoxScoreFast(std::vector<std::vector<float>> box_array, cv::Mat pred);
+  float PolyScoreFast(std::vector<cv::Point> box_array, cv::Mat pred);
+  float PolygonScoreAcc(std::vector<cv::Point> contour, cv::Mat pred);
+
+  std::vector<std::vector<std::vector<int>>>
+  BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
+                  const float &box_thresh, const float &det_db_unclip_ratio,
+                  const std::string &det_db_score_mode);
+
+  std::vector<std::vector<std::vector<int>>>
+  PolygonFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
+                    const float &box_thresh, const float &det_db_unclip_ratio,
+                    const std::string &det_db_score_mode);
+
+  std::vector<std::vector<std::vector<int>>>
+  FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
+                  const std::array<int, 4> &det_img_info);
+
+  std::vector<std::vector<std::vector<int>>>
+  FilterCURVETagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
+                       const std::array<int, 4> &det_img_info);
+
+private:
+  static bool XsortInt(std::vector<int> a, std::vector<int> b);
+
+  static bool XsortFp32(std::vector<float> a, std::vector<float> b);
+
+  std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
+
+  inline int _max(int a, int b) { return a >= b ? a : b; }
+
+  inline int _min(int a, int b) { return a >= b ? b : a; }
+
+  template <class T> inline T clamp(T x, T min, T max) {
+    if (x > max)
+      return max;
+    if (x < min)
+      return min;
+    return x;
+  }
+
+  inline float clampf(float x, float min, float max) {
+    if (x > max)
+      return max;
+    if (x < min)
+      return min;
+    return x;
+  }
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h
new file mode 100755
index 0000000000..26c84a3cd8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h
@@ -0,0 +1,49 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/result.h"
+#include <set>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+ULTRAINFER_DECL cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
+                                           const std::array<int, 8> &box);
+
+ULTRAINFER_DECL void SortBoxes(std::vector<std::array<int, 8>> *boxes);
+
+ULTRAINFER_DECL std::vector<int> ArgSort(const std::vector<float> &array);
+
+ULTRAINFER_DECL std::vector<float> Softmax(std::vector<float> &src);
+
+ULTRAINFER_DECL std::vector<int> Xyxyxyxy2Xyxy(std::array<int, 8> &box);
+
+ULTRAINFER_DECL float Dis(std::vector<int> &box1, std::vector<int> &box2);
+
+ULTRAINFER_DECL float Iou(std::vector<int> &box1, std::vector<int> &box2);
+
+ULTRAINFER_DECL bool ComparisonDis(const std::vector<float> &dis1,
+                                   const std::vector<float> &dis2);
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc
new file mode 100755
index 0000000000..4d33e4678e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/softmax.cc
@@ -0,0 +1,51 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+static inline float FastExp(float x) {
+  union {
+    uint32_t i;
+    float f;
+  } v{};
+  v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+  return v.f;
+}
+
+std::vector<float> Softmax(std::vector<float> &src) {
+  int length = src.size();
+  std::vector<float> dst;
+  dst.resize(length);
+  const float alpha =
+      static_cast<float>(*std::max_element(&src[0], &src[0 + length]));
+  float denominator{0};
+
+  for (int i = 0; i < length; ++i) {
+    dst[i] = FastExp(src[i] - alpha);
+    denominator += dst[i];
+  }
+
+  for (int i = 0; i < length; ++i) {
+    dst[i] /= denominator;
+  }
+  return dst;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc
new file mode 100755
index 0000000000..075ce672b6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/utils/sorted_boxes.cc
@@ -0,0 +1,64 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+bool CompareBox(const std::array<int, 8> &result1,
+                const std::array<int, 8> &result2) {
+  if (result1[1] < result2[1]) {
+    return true;
+  } else if (result1[1] == result2[1]) {
+    return result1[0] < result2[0];
+  } else {
+    return false;
+  }
+}
+
+void SortBoxes(std::vector<std::array<int, 8>> *boxes) {
+  std::sort(boxes->begin(), boxes->end(), CompareBox);
+
+  if (boxes->size() == 0) {
+    return;
+  }
+
+  for (int i = 0; i < boxes->size() - 1; i++) {
+    for (int j = i; j >= 0; j--) {
+      if (std::abs((*boxes)[j + 1][1] - (*boxes)[j][1]) < 10 &&
+          ((*boxes)[j + 1][0] < (*boxes)[j][0])) {
+        std::swap((*boxes)[i], (*boxes)[i + 1]);
+      }
+    }
+  }
+}
+
+std::vector<int> ArgSort(const std::vector<float> &array) {
+  const int array_len(array.size());
+  std::vector<int> array_index(array_len, 0);
+  for (int i = 0; i < array_len; ++i)
+    array_index[i] = i;
+
+  std::sort(
+      array_index.begin(), array_index.end(),
+      [&array](int pos1, int pos2) { return (array[pos1] < array[pos2]); });
+
+  return array_index;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc
new file mode 100755
index 0000000000..f8f5466f3a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.cc
@@ -0,0 +1,56 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+// bool UVDocPostprocessor::SingleBatchPostprocessor(const float* out_data,
+// cv::Mat* result) {
+//     // Reverse normalization
+//     std::vector<float> mean{127.5f, 127.5f, 127.5f};
+//     std::vector<float> std{127.5f, 127.5f, 127.5f};
+//     Mat result_mat = Mat::Create(result->rows, result->cols, 3,
+//     FDDataType::FP32, const_cast<float*>(out_data));
+//     Convert::Run(&result_mat, mean, std);
+
+//     // Convert result_mat to OpenCV Mat object
+//     auto temp = result_mat.GetOpenCVMat();
+//     cv::Mat res = cv::Mat::zeros(temp->size(), CV_8UC3);
+//     temp->convertTo(res, CV_8UC3, 1);
+
+//     // Execute BGR2RGB conversion
+//     Mat fd_image = WrapMat(res);
+//     BGR2RGB::Run(&fd_image);
+//     res = *(fd_image.GetOpenCVMat());
+
+//     // Copy result to output
+//     res.copyTo(*result);
+
+//     return true;
+// }
+
+bool UVDocPostprocessor::Run(const std::vector<FDTensor> &infer_results,
+                             std::vector<FDTensor> *results) {
+  *results = infer_results;
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h
new file mode 100755
index 0000000000..2c07e50f9c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/function/functions.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Postprocessor object for UVDoc serials model.
+ */
+class ULTRAINFER_DECL UVDocPostprocessor {
+public:
+  UVDocPostprocessor() {}
+  /** \brief Process the result of runtime and fill to UVDocResult
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] results The output text results of UVDoc
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<FDTensor> *results);
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc
new file mode 100755
index 0000000000..7612b5bbe3
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.cc
@@ -0,0 +1,45 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+UVDocPreprocessor::UVDocPreprocessor() {
+  normalize_permute_op_ = std::make_shared<NormalizeAndPermute>(
+      std::vector<float>({0.0f, 0.0f, 0.0f}),
+      std::vector<float>({1.0f, 1.0f, 1.0f}), true);
+}
+
+bool UVDocPreprocessor::Apply(FDMatBatch *image_batch,
+                              std::vector<FDTensor> *outputs) {
+
+  if (!disable_normalize_ && !disable_permute_) {
+    (*normalize_permute_op_)(image_batch);
+  }
+
+  outputs->resize(1);
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h
new file mode 100755
index 0000000000..c10bbd5bd9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/normalize_and_permute.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace ocr {
+/*! @brief Preprocessor object for UVDoc serials model.
+ */
+class ULTRAINFER_DECL UVDocPreprocessor : public ProcessorManager {
+public:
+  UVDocPreprocessor();
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * wrapped by FDMat. \param[in] output The output tensors which will feed in
+   * runtime \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// Set preprocess normalize parameters, please call this API to customize
+  /// the normalize parameters, otherwise it will use the default normalize
+  /// parameters.
+  void SetNormalize(const std::vector<float> &mean,
+                    const std::vector<float> &std, bool is_scale) {
+    normalize_permute_op_ =
+        std::make_shared<NormalizeAndPermute>(mean, std, is_scale);
+  }
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+
+private:
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc
new file mode 100755
index 0000000000..0c563165ce
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.cc
@@ -0,0 +1,101 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/ocr/ppocr/uvdocwarpper.h"
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace ocr {
+
+UVDocWarpper::UVDocWarpper() {}
+UVDocWarpper::UVDocWarpper(const std::string &model_file,
+                           const std::string &params_file,
+                           const RuntimeOption &custom_option,
+                           const ModelFormat &model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+// Init
+bool UVDocWarpper::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+std::unique_ptr<UVDocWarpper> UVDocWarpper::Clone() const {
+  std::unique_ptr<UVDocWarpper> clone_model =
+      utils::make_unique<UVDocWarpper>(UVDocWarpper(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool UVDocWarpper::Predict(cv::Mat *im, FDTensor *result) {
+  return Predict(*im, result);
+}
+
+bool UVDocWarpper::Predict(const cv::Mat &img, FDTensor *result) {
+  std::vector<FDTensor> results;
+  if (!BatchPredict({img}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool UVDocWarpper::BatchPredict(const std::vector<cv::Mat> &images,
+                                std::vector<FDTensor> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h
new file mode 100755
index 0000000000..28fa279c44
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/ocr/ppocr/uvdocwarpper.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/utils/unique_ptr.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
+#include "ultrainfer/vision/ocr/ppocr/uvdoc_postprocessor.h"
+#include "ultrainfer/vision/ocr/ppocr/uvdoc_preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All OCR series model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+
+/*! @brief UVDocWarpper object is used to load the detection model provided by
+ * PaddleOCR.
+ */
+class ULTRAINFER_DECL UVDocWarpper : public UltraInferModel {
+public:
+  UVDocWarpper();
+  /** \brief Set path of model file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g
+   * ./ch_PP-OCRv3_det_infer/model.pdmodel. \param[in] params_file Path of
+   * parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model
+   * format is ONNX, this parameter will be ignored. \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends`. \param[in] model_format Model
+   * format of the loaded model, default is Paddle format.
+   */
+  UVDocWarpper(const std::string &model_file,
+               const std::string &params_file = "",
+               const RuntimeOption &custom_option = RuntimeOption(),
+               const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new UVDocWarpper with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new UVDocWarpper* type unique pointer
+   */
+  virtual std::unique_ptr<UVDocWarpper> Clone() const;
+
+  /// Get model's name
+  std::string ModelName() const { return "pp-uvdoc"; }
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] result The output of OCR
+   * detection model result will be writen to this structure. \return true if
+   * the prediction is successed, otherwise false.
+   */
+
+  virtual bool Predict(cv::Mat *img, FDTensor *result);
+
+  virtual bool Predict(const cv::Mat &img, FDTensor *result);
+
+  /** \brief Predict the input image and get OCR detection model result.
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format. \param[in] result The output of OCR
+   * detection model result will be writen to this structure. \return true if
+   * the prediction is successed, otherwise false.
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<FDTensor> *results);
+
+  /** \brief BatchPredict the input image and get OCR detection model result.
+   *
+   * \param[in] images The list input of image data, comes from cv::imread(), is
+   * a 3-D array with layout HWC, BGR format. \param[in] results The output of
+   * OCR detection model result will be writen to this structure. \return true
+   * if the prediction is successed, otherwise false.
+   */
+  /// Get preprocessor reference of UVDocWarpperPreprocessor
+  virtual UVDocPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  // / Get postprocessor reference of UVDocWarpperPostprocessor
+  virtual UVDocPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+private:
+  bool Initialize();
+  UVDocPreprocessor preprocessor_;
+  UVDocPostprocessor postprocessor_;
+};
+
+} // namespace ocr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc
new file mode 100755
index 0000000000..9e560d0fbd
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.cc
@@ -0,0 +1,86 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/caddn/caddn.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+Caddn::Caddn(const std::string &model_file, const std::string &params_file,
+             const std::string &config_file, const RuntimeOption &custom_option,
+             const ModelFormat &model_format)
+    : preprocessor_(config_file) {
+  valid_gpu_backends = {Backend::PDINFER};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool Caddn::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool Caddn::Predict(const cv::Mat &im, std::vector<float> &input_cam_data,
+                    std::vector<float> &input_lidar_data,
+                    PerceptionResult *result) {
+  std::vector<PerceptionResult> results;
+  if (!BatchPredict({im}, input_cam_data, input_lidar_data, &results)) {
+    return false;
+  }
+  if (results.size()) {
+    *result = std::move(results[0]);
+  }
+  return true;
+}
+
+bool Caddn::BatchPredict(const std::vector<cv::Mat> &images,
+                         std::vector<float> &input_cam_data,
+                         std::vector<float> &input_lidar_data,
+                         std::vector<PerceptionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, input_cam_data, input_lidar_data,
+                         &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = "images";
+  reused_input_tensors_[1].name = "trans_cam_to_img";
+  reused_input_tensors_[2].name = "trans_lidar_to_cam";
+
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h
new file mode 100755
index 0000000000..75bf8c8d9c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn.h
@@ -0,0 +1,81 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h"
+#include "ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+/*! @brief Caddn model object used when to load a Caddn model exported by Caddn.
+ */
+class ULTRAINFER_DECL Caddn : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g Caddn/model.pdiparams
+   * \param[in] params_file Path of parameter file, e.g Caddn/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is Paddle format
+   */
+  Caddn(const std::string &model_file, const std::string &params_file,
+        const std::string &config_file,
+        const RuntimeOption &custom_option = RuntimeOption(),
+        const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "Paddle3D/Caddn"; }
+
+  /** \brief Predict the perception result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output perception
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, std::vector<float> &input_cam_data,
+                       std::vector<float> &input_lidar_data,
+                       PerceptionResult *results);
+
+  /** \brief Predict the perception results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output perception result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &images,
+                            std::vector<float> &input_cam_data,
+                            std::vector<float> &input_lidar_data,
+                            std::vector<PerceptionResult> *results);
+
+  /// Get preprocessor reference of Caddn
+  virtual CaddnPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of Caddn
+  virtual CaddnPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  CaddnPreprocessor preprocessor_;
+  CaddnPostprocessor postprocessor_;
+  bool initialized_ = false;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc
new file mode 100755
index 0000000000..cd86c4e67b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/caddn_pybind.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindCaddn(pybind11::module &m) {
+  pybind11::class_<vision::perception::CaddnPreprocessor,
+                   vision::ProcessorManager>(m, "CaddnPreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run",
+           [](vision::perception::CaddnPreprocessor &self,
+              std::vector<pybind11::array> &im_list,
+              std::vector<float> &cam_data, std::vector<float> &lidar_data) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, cam_data, lidar_data, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in CaddnPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           });
+
+  pybind11::class_<vision::perception::CaddnPostprocessor>(m,
+                                                           "CaddnPostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::perception::CaddnPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::PerceptionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "CaddnPostprocessor.");
+             }
+             return results;
+           })
+      .def("run", [](vision::perception::CaddnPostprocessor &self,
+                     std::vector<pybind11::array> &input_array) {
+        std::vector<vision::PerceptionResult> results;
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error(
+              "Failed to postprocess the runtime result in "
+              "CaddnPostprocessor.");
+        }
+        return results;
+      });
+
+  pybind11::class_<vision::perception::Caddn, UltraInferModel>(m, "Caddn")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::perception::Caddn &self, pybind11::array &data,
+              std::vector<float> &cam_data, std::vector<float> &lidar_data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::PerceptionResult res;
+             self.Predict(mat, cam_data, lidar_data, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::perception::Caddn &self,
+              std::vector<pybind11::array> &data, std::vector<float> &cam_data,
+              std::vector<float> &lidar_data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::PerceptionResult> results;
+             self.BatchPredict(images, cam_data, lidar_data, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::perception::Caddn::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::perception::Caddn::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc
new file mode 100755
index 0000000000..e4389b7046
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+CaddnPostprocessor::CaddnPostprocessor() {}
+
+bool CaddnPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                             std::vector<PerceptionResult> *results) {
+  results->resize(1);
+  (*results)[0].Clear();
+  (*results)[0].Reserve(tensors[0].shape[0]);
+  if (tensors[0].dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  const float *data_0 = reinterpret_cast<const float *>(tensors[0].Data());
+  auto result = &(*results)[0];
+  for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 7) {
+    // item 1 ~ 3   :  box3d bottom center x, y, z
+    // item 4 ~ 6   :  box3d w, h, l
+    // item 7       :  box3d yaw angle
+    std::vector<float> vec(data_0 + i, data_0 + i + 7);
+    result->boxes.emplace_back(
+        std::array<float, 7>{0, 0, 0, 0, vec[3], vec[4], vec[5]});
+    result->center.emplace_back(std::array<float, 3>{vec[0], vec[1], vec[2]});
+    result->yaw_angle.push_back(vec[6]);
+  }
+  const float *data_1 = reinterpret_cast<const float *>(tensors[2].Data());
+  for (int i = 0; i < tensors[2].shape[0]; i += 1) {
+    std::vector<float> vec(data_1 + i, data_1 + i + 1);
+    result->scores.push_back(vec[0]);
+  }
+  const float *data_2 = reinterpret_cast<const float *>(tensors[1].Data());
+  for (int i = 0; i < tensors[1].shape[0]; i++) {
+    std::vector<float> vec(data_2 + i, data_2 + i + 1);
+    result->label_ids.push_back(vec[0]);
+  }
+
+  result->valid.push_back(true);  // 0 scores
+  result->valid.push_back(true);  // 1 label_ids
+  result->valid.push_back(true);  // 2 boxes
+  result->valid.push_back(true);  // 3 center
+  result->valid.push_back(false); // 4 observation_angle
+  result->valid.push_back(true);  // 5 yaw_angle
+  result->valid.push_back(false); // 6 velocity
+
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h
new file mode 100755
index 0000000000..35a554be2b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/postprocessor.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Postprocessor object for Caddn serials model.
+ */
+class ULTRAINFER_DECL CaddnPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for Caddn serials model
+   */
+  CaddnPostprocessor();
+
+  /** \brief Process the result of runtime and fill to PerceptionResult
+   * structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<PerceptionResult> *results);
+
+protected:
+  float conf_threshold_;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc
new file mode 100755
index 0000000000..ffba6c02ad
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.cc
@@ -0,0 +1,112 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h"
+
+#include "ultrainfer/function/concat.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+CaddnPreprocessor::CaddnPreprocessor(const std::string &config_file) {
+  config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipeline(),
+           "Failed to create Paddle3DDetPreprocessor.");
+  initialized_ = true;
+}
+
+bool CaddnPreprocessor::BuildPreprocessPipeline() {
+  processors_.clear();
+
+  // preprocess
+  processors_.push_back(std::make_shared<BGR2RGB>());
+
+  std::vector<float> alpha = {1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0};
+  std::vector<float> beta = {0.0, 0.0, 0.0};
+  processors_.push_back(std::make_shared<Convert>(alpha, beta));
+
+  processors_.push_back(std::make_shared<Cast>("float"));
+  processors_.push_back(std::make_shared<HWC2CHW>());
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+
+  return true;
+}
+
+bool CaddnPreprocessor::Apply(FDMatBatch *image_batch,
+                              std::vector<float> &input_cam_data,
+                              std::vector<float> &input_lidar_data,
+                              std::vector<FDTensor> *outputs) {
+  if (image_batch->mats->empty()) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  // There are 3 outputs, image, cam_data, lidar_data
+  outputs->resize(3);
+  int batch = static_cast<int>(image_batch->mats->size());
+
+  // Allocate memory for cam_data
+  (*outputs)[1].Resize({batch, 3, 4}, FDDataType::FP32);
+
+  // Allocate memory for lidar_data
+  (*outputs)[2].Resize({batch, 4, 4}, FDDataType::FP32);
+
+  auto *cam_data_ptr = reinterpret_cast<float *>((*outputs)[1].MutableData());
+  auto *lidar_data_ptr = reinterpret_cast<float *>((*outputs)[2].MutableData());
+
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    for (size_t j = 0; j < processors_.size(); ++j) {
+      if (!(*(processors_[j].get()))(mat)) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << processors_[j]->Name() << "." << std::endl;
+        return false;
+      }
+    }
+
+    memcpy(cam_data_ptr + i * 12, input_cam_data.data(), 12 * sizeof(float));
+    memcpy(lidar_data_ptr + i * 16, input_lidar_data.data(),
+           16 * sizeof(float));
+  }
+
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+
+  return true;
+}
+
+bool CaddnPreprocessor::Run(std::vector<FDMat> *images,
+                            std::vector<float> &input_cam_data,
+                            std::vector<float> &input_lidar_data,
+                            std::vector<FDTensor> *outputs) {
+  FDMatBatch image_batch(images);
+  PreApply(&image_batch);
+  bool ret = Apply(&image_batch, input_cam_data, input_lidar_data, outputs);
+  PostApply();
+  return ret;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h
new file mode 100755
index 0000000000..769df72960
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/caddn/preprocessor.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Preprocessor object for Caddn serials model.
+ */
+class ULTRAINFER_DECL CaddnPreprocessor : public ProcessorManager {
+public:
+  CaddnPreprocessor() = default;
+  /** \brief Create a preprocessor instance for Caddn model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * Caddn/infer_cfg.yml
+   */
+  explicit CaddnPreprocessor(const std::string &config_file);
+
+  bool Run(std::vector<FDMat> *images, std::vector<float> &input_cam_data,
+           std::vector<float> &input_lidar_data,
+           std::vector<FDTensor> *outputs);
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs) {
+    FDERROR << "CaddnPreprocessor should input cam and lidar datas"
+            << std::endl;
+    return 0;
+  };
+  bool Apply(FDMatBatch *image_batch, std::vector<float> &input_cam_data,
+             std::vector<float> &input_lidar_data,
+             std::vector<FDTensor> *outputs);
+
+protected:
+  bool BuildPreprocessPipeline();
+  std::vector<std::shared_ptr<Processor>> processors_;
+
+  bool disable_permute_ = false;
+
+  bool initialized_ = false;
+
+  std::string config_file_;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc
new file mode 100755
index 0000000000..1ff09e56d4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+Centerpoint::Centerpoint(const std::string &model_file,
+                         const std::string &params_file,
+                         const std::string &config_file,
+                         const RuntimeOption &custom_option,
+                         const ModelFormat &model_format)
+    : preprocessor_(config_file) {
+  valid_gpu_backends = {Backend::PDINFER};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool Centerpoint::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool Centerpoint::Predict(const std::string point_dir,
+                          PerceptionResult *result) {
+  std::vector<PerceptionResult> results;
+  if (!BatchPredict({point_dir}, &results)) {
+    return false;
+  }
+
+  if (results.size()) {
+    *result = std::move(results[0]);
+  }
+  return true;
+}
+
+bool Centerpoint::BatchPredict(std::vector<std::string> points_dir,
+                               std::vector<PerceptionResult> *results) {
+  int64_t num_point_dim = 5;
+  int with_timelag = 0;
+  if (!preprocessor_.Run(points_dir, num_point_dim, with_timelag,
+                         reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  results->resize(reused_input_tensors_.size());
+  for (int index = 0; index < reused_input_tensors_.size(); ++index) {
+    std::vector<FDTensor> input_tensor;
+    input_tensor.push_back(reused_input_tensors_[index]);
+
+    input_tensor[0].name = InputInfoOfRuntime(0).name;
+
+    if (!Infer(input_tensor, &reused_output_tensors_)) {
+      FDERROR << "Failed to inference by runtime." << std::endl;
+      return false;
+    }
+
+    (*results)[index].Clear();
+    (*results)[index].Reserve(reused_output_tensors_[0].shape[0]);
+    if (!postprocessor_.Run(reused_output_tensors_, &((*results)[index]))) {
+      FDERROR << "Failed to postprocess the inference results by runtime."
+              << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h
new file mode 100755
index 0000000000..67d40cb9ec
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint.h
@@ -0,0 +1,81 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h"
+#include "ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+/*! @brief Centerpoint model object used when to load a Centerpoint model
+ * exported by Centerpoint.
+ */
+class ULTRAINFER_DECL Centerpoint : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g Centerpoint/model.pdiparams
+   * \param[in] params_file Path of parameter file, e.g
+   * Centerpoint/model.pdiparams, if the model format is ONNX, this parameter
+   * will be ignored \param[in] custom_option RuntimeOption for inference, the
+   * default will use cpu, and choose the backend defined in
+   * "valid_cpu_backends" \param[in] model_format Model format of the loaded
+   * model, default is Paddle format
+   */
+  Centerpoint(const std::string &model_file, const std::string &params_file,
+              const std::string &config_file,
+              const RuntimeOption &custom_option = RuntimeOption(),
+              const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "Paddle3D/Centerpoint"; }
+
+  /** \brief Predict the perception result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output perception
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(std::string point_dir, PerceptionResult *result);
+
+  /** \brief Predict the perception results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output perception result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(std::vector<std::string> points_dir,
+                            std::vector<PerceptionResult> *results);
+
+  /// Get preprocessor reference of Centerpoint
+  virtual CenterpointPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of Centerpoint
+  virtual CenterpointPostprocessor &GetPostprocessor() {
+    return postprocessor_;
+  }
+
+protected:
+  bool Initialize();
+  CenterpointPreprocessor preprocessor_;
+  CenterpointPostprocessor postprocessor_;
+  bool initialized_ = false;
+  std::vector<std::vector<FDTensor>> ouput_tensors;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc
new file mode 100755
index 0000000000..89b9d79885
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/centerpoint_pybind.cc
@@ -0,0 +1,56 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindCenterpoint(pybind11::module &m) {
+  pybind11::class_<vision::perception::CenterpointPreprocessor,
+                   vision::ProcessorManager>(m, "CenterpointPreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run", [](vision::perception::CenterpointPreprocessor &self,
+                     std::vector<std::string> points_dir,
+                     const int64_t num_point_dim, const int with_timelag) {
+        std::vector<FDTensor> outputs;
+        if (!self.Run(points_dir, num_point_dim, with_timelag, outputs)) {
+          throw std::runtime_error("Failed to preprocess the input data in "
+                                   "CenterpointPreprocessor.");
+        }
+
+        return outputs;
+      });
+
+  pybind11::class_<vision::perception::Centerpoint, UltraInferModel>(
+      m, "Centerpoint")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::perception::Centerpoint &self, std::string point_dir) {
+             vision::PerceptionResult result;
+             self.Predict(point_dir, &result);
+             return result;
+           })
+      .def("batch_predict",
+           [](vision::perception::Centerpoint &self,
+              std::vector<std::string> &points_dir) {
+             std::vector<vision::PerceptionResult> results;
+             self.BatchPredict(points_dir, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::perception::Centerpoint::GetPreprocessor)
+      .def_property_readonly(
+          "postprocessor", &vision::perception::Centerpoint::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc
new file mode 100755
index 0000000000..ff822c0867
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.cc
@@ -0,0 +1,71 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+CenterpointPostprocessor::CenterpointPostprocessor() {}
+
+bool CenterpointPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                                   PerceptionResult *result) {
+  if (tensors[0].dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+
+  const float *data_0 = reinterpret_cast<const float *>(tensors[0].Data());
+  for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 9) {
+    // item 1 ~ 3   :  box3d bottom center x, y, z
+    // item 4 ~ 6   :  box3d w, l, h
+    // item 7 ~ 8   :  speed x,y
+    // item 9   :  box3d yaw angle
+    std::vector<float> vec(data_0 + i, data_0 + i + 9);
+    result->boxes.emplace_back(
+        std::array<float, 7>{0, 0, 0, 0, vec[3], vec[4], vec[5]});
+    result->center.emplace_back(std::array<float, 3>{vec[0], vec[1], vec[2]});
+    result->yaw_angle.push_back(vec[8]);
+    result->velocity.push_back(std::array<float, 3>{vec[6], vec[7]});
+  }
+
+  const float *data_1 = reinterpret_cast<const float *>(tensors[2].Data());
+  for (int i = 0; i < tensors[1].shape[0]; i += 1) {
+    std::vector<float> vec(data_1 + i, data_1 + i + 1);
+    result->scores.push_back(vec[0]);
+  }
+
+  const long long *data_2 =
+      reinterpret_cast<const long long *>(tensors[1].Data());
+  for (int i = 0; i < tensors[2].shape[0]; i++) {
+    std::vector<long long> vec(data_2 + i, data_2 + i + 1);
+    result->label_ids.push_back(vec[0]);
+  }
+  result->valid.push_back(true);  // 0 scores
+  result->valid.push_back(true);  // 1 label_ids
+  result->valid.push_back(true);  // 2 boxes
+  result->valid.push_back(true);  // 3 center
+  result->valid.push_back(false); // 4 observation_angle
+  result->valid.push_back(true);  // 5 yaw_angle
+  result->valid.push_back(true);  // 6 velocity
+
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h
new file mode 100755
index 0000000000..6e03edaf4a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/postprocessor.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Postprocessor object for Centerpoint serials model.
+ */
+class ULTRAINFER_DECL CenterpointPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for Centerpoint serials model
+   */
+  CenterpointPostprocessor();
+
+  /** \brief Process the result of runtime and fill to PerceptionResult
+   * structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors, PerceptionResult *results);
+
+protected:
+  float conf_threshold_;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc
new file mode 100755
index 0000000000..90a8fa81ec
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.cc
@@ -0,0 +1,105 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+CenterpointPreprocessor::CenterpointPreprocessor(
+    const std::string &config_file) {
+  initialized_ = true;
+}
+
+bool CenterpointPreprocessor::ReadPoint(const std::string &file_path,
+                                        const int64_t num_point_dim,
+                                        std::vector<float> &data,
+                                        int64_t *num_points) {
+  std::ifstream file_in(file_path, std::ios::in | std::ios::binary);
+  if (num_point_dim < 4) {
+    FDERROR << "Point dimension must not be less than 4, but received "
+            << "num_point_dim is " << num_point_dim << std::endl;
+  }
+
+  if (!file_in) {
+    FDERROR << "Failed to read file: " << file_path << std::endl;
+    return false;
+  }
+
+  std::streampos file_size;
+  file_in.seekg(0, std::ios::end);
+  file_size = file_in.tellg();
+  file_in.seekg(0, std::ios::beg);
+
+  data.resize(file_size / sizeof(float));
+  file_in.read(reinterpret_cast<char *>(data.data()), file_size);
+  file_in.close();
+
+  if (file_size / sizeof(float) % num_point_dim != 0) {
+    FDERROR << "Loaded file size (" << file_size
+            << ") is not evenly divisible by num_point_dim (" << num_point_dim
+            << ")\n";
+    return false;
+  }
+  *num_points = file_size / sizeof(float) / num_point_dim;
+  return true;
+}
+
+bool CenterpointPreprocessor::InsertTimeToPoints(const int64_t num_points,
+                                                 const int64_t num_point_dim,
+                                                 float *points) {
+  for (int64_t i = 0; i < num_points; ++i) {
+    *(points + i * num_point_dim + 4) = 0.;
+  }
+  return true;
+}
+
+bool CenterpointPreprocessor::Apply(std::vector<std::string> &points_dir,
+                                    const int64_t num_point_dim,
+                                    const int with_timelag,
+                                    std::vector<FDTensor> &outputs) {
+  for (int index = 0; index < points_dir.size(); ++index) {
+    std::string file_path = points_dir[index];
+    std::vector<int64_t> points_shape;
+    std::vector<float> data;
+    int64_t num_points;
+    if (!ReadPoint(file_path, num_point_dim, data, &num_points)) {
+      return false;
+    }
+    float *points = data.data();
+
+    if (!with_timelag && num_point_dim == 5 || num_point_dim > 5) {
+      InsertTimeToPoints(num_points, num_point_dim, points);
+    }
+    points_shape.push_back(num_points);
+    points_shape.push_back(num_point_dim);
+
+    FDTensor tensor;
+    tensor.SetData(points_shape, FDDataType::FP32, points, true);
+    outputs.push_back(tensor);
+  }
+  return true;
+}
+
+bool CenterpointPreprocessor::Run(std::vector<std::string> &points_dir,
+                                  const int64_t num_point_dim,
+                                  const int with_timelag,
+                                  std::vector<FDTensor> &outputs) {
+  bool ret = Apply(points_dir, num_point_dim, with_timelag, outputs);
+  return ret;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h
new file mode 100755
index 0000000000..4685f2c9c7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/centerpoint/preprocessor.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Preprocessor object for Centerpoint model.
+ */
+class ULTRAINFER_DECL CenterpointPreprocessor : public ProcessorManager {
+public:
+  CenterpointPreprocessor() = default;
+  /** \brief Create a preprocessor instance for Centerpoint model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * Centerpoint/infer_cfg.yml
+   */
+  explicit CenterpointPreprocessor(const std::string &config_file);
+
+  bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs) {
+    return false;
+  }
+
+  bool Apply(std::vector<std::string> &points_dir, const int64_t num_point_dim,
+             const int with_timelag, std::vector<FDTensor> &outputs);
+
+  bool Run(std::vector<std::string> &points_dir, const int64_t num_point_dim,
+           const int with_timelag, std::vector<FDTensor> &outputs);
+
+protected:
+  std::vector<std::shared_ptr<Processor>> processors_;
+  bool ReadPoint(const std::string &file_path, const int64_t num_point_dim,
+                 std::vector<float> &data, int64_t *num_points);
+  bool InsertTimeToPoints(const int64_t num_points, const int64_t num_point_dim,
+                          float *points);
+  bool initialized_ = false;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc
new file mode 100755
index 0000000000..e1ecebd860
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/petr/petr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+Petr::Petr(const std::string &model_file, const std::string &params_file,
+           const std::string &config_file, const RuntimeOption &custom_option,
+           const ModelFormat &model_format)
+    : preprocessor_(config_file) {
+  valid_cpu_backends = {Backend::PDINFER};
+  valid_gpu_backends = {Backend::PDINFER};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  runtime_option.paddle_infer_option.enable_mkldnn = false;
+  initialized = Initialize();
+}
+
+bool Petr::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool Petr::Predict(const cv::Mat &images, PerceptionResult *results) {
+  FDERROR << "Petr inference only support 6(V1) or 12(V2) images" << std::endl;
+  return false;
+}
+
+bool Petr::BatchPredict(const std::vector<cv::Mat> &images,
+                        std::vector<PerceptionResult> *results) {
+  if ((images.size() != 6) && (images.size() != 12)) {
+    FDERROR << "Petr only support 6(V1) or 12(V2) images";
+    return false;
+  }
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  // Note: un-commented the codes below to show the debug info.
+  // reused_input_tensors_[0].PrintInfo();
+  // reused_input_tensors_[1].PrintInfo();
+  // reused_input_tensors_[2].PrintInfo();
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[1].name = InputInfoOfRuntime(1).name;
+  if (images.size() == 12) {
+    // for Petr V2 timestamp
+    reused_input_tensors_[2].name = InputInfoOfRuntime(2).name;
+  } else {
+    // for Petr V1
+    reused_input_tensors_.pop_back();
+  }
+
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h
new file mode 100755
index 0000000000..90c5e78e1b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr.h
@@ -0,0 +1,77 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/perception/paddle3d/petr/postprocessor.h"
+#include "ultrainfer/vision/perception/paddle3d/petr/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+/*! @brief petr model object used when to load a petr model exported by petr.
+ */
+class ULTRAINFER_DECL Petr : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g petr/model.pdiparams
+   * \param[in] params_file Path of parameter file, e.g petr/model.pdiparams, if
+   * the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is Paddle format
+   */
+  Petr(const std::string &model_file, const std::string &params_file,
+       const std::string &config_file,
+       const RuntimeOption &custom_option = RuntimeOption(),
+       const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "Paddle3D/petr"; }
+
+  /** \brief Predict the perception result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output perception
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, PerceptionResult *result);
+
+  /** \brief Predict the perception results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output perception result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<PerceptionResult> *results);
+
+  /// Get preprocessor reference of Petr
+  virtual PetrPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of Petr
+  virtual PetrPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  PetrPreprocessor preprocessor_;
+  PetrPostprocessor postprocessor_;
+  bool initialized_ = false;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc
new file mode 100755
index 0000000000..24f2ab49fc
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/petr_pybind.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPetr(pybind11::module &m) {
+  pybind11::class_<vision::perception::PetrPreprocessor,
+                   vision::ProcessorManager>(m, "PetrPreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run", [](vision::perception::PetrPreprocessor &self,
+                     std::vector<pybind11::array> &im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        if (!self.Run(&images, &outputs)) {
+          throw std::runtime_error(
+              "Failed to preprocess the input data in PetrPreprocessor.");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return outputs;
+      });
+
+  pybind11::class_<vision::perception::PetrPostprocessor>(m,
+                                                          "PetrPostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::perception::PetrPostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::PerceptionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PetrPostprocessor.");
+             }
+             return results;
+           })
+      .def("run", [](vision::perception::PetrPostprocessor &self,
+                     std::vector<pybind11::array> &input_array) {
+        std::vector<vision::PerceptionResult> results;
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error(
+              "Failed to postprocess the runtime result in "
+              "PetrPostprocessor.");
+        }
+        return results;
+      });
+
+  pybind11::class_<vision::perception::Petr, UltraInferModel>(m, "Petr")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::perception::Petr &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::PerceptionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::perception::Petr &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::PerceptionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::perception::Petr::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::perception::Petr::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc
new file mode 100755
index 0000000000..e82d5c697e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/petr/postprocessor.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+PetrPostprocessor::PetrPostprocessor() {}
+
+bool PetrPostprocessor::Run(const std::vector<FDTensor> &tensors,
+                            std::vector<PerceptionResult> *results) {
+  results->resize(1);
+  (*results)[0].Clear();
+  (*results)[0].Reserve(tensors[0].shape[0]);
+  if (tensors[0].dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  const float *data_0 = reinterpret_cast<const float *>(tensors[0].Data());
+  auto result = &(*results)[0];
+  for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 9) {
+    // item 1 ~ 3   :  box3d w, h, l
+    // item 4 ~ 6   :  box3d bottom center x, y, z
+    // item 7       :  box3d yaw angle
+    // item 8 ~ 9   :  speed x,y
+    std::vector<float> vec(data_0 + i, data_0 + i + 9);
+    result->boxes.emplace_back(
+        std::array<float, 7>{0, 0, 0, 0, vec[0], vec[1], vec[2]});
+    result->center.emplace_back(std::array<float, 3>{vec[3], vec[4], vec[5]});
+    result->yaw_angle.push_back(vec[6]);
+    result->velocity.push_back(std::array<float, 3>{vec[7], vec[8]});
+  }
+  const float *data_1 = reinterpret_cast<const float *>(tensors[1].Data());
+  for (int i = 0; i < tensors[1].shape[0]; i += 1) {
+    std::vector<float> vec(data_1 + i, data_1 + i + 1);
+    result->scores.push_back(vec[0]);
+  }
+  const long long *data_2 =
+      reinterpret_cast<const long long *>(tensors[2].Data());
+  for (int i = 0; i < tensors[2].shape[0]; i++) {
+    std::vector<long long> vec(data_2 + i, data_2 + i + 1);
+    result->label_ids.push_back(vec[0]);
+  }
+
+  result->valid.push_back(true);  // 0 scores
+  result->valid.push_back(true);  // 1 label_ids
+  result->valid.push_back(true);  // 2 boxes
+  result->valid.push_back(true);  // 3 center
+  result->valid.push_back(false); // 4 observation_angle
+  result->valid.push_back(true);  // 5 yaw_angle
+  result->valid.push_back(true);  // 6 velocity
+
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h
new file mode 100755
index 0000000000..ed2ff151e2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/postprocessor.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Postprocessor object for Petr serials model.
+ */
+class ULTRAINFER_DECL PetrPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for Petr serials model
+   */
+  PetrPostprocessor();
+
+  /** \brief Process the result of runtime and fill to PerceptionResult
+   * structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<PerceptionResult> *results);
+
+protected:
+  float conf_threshold_;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc
new file mode 100755
index 0000000000..277fbb9b14
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.cc
@@ -0,0 +1,114 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/petr/preprocessor.h"
+
+#include <iostream>
+
+#include "ultrainfer/function/concat.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+PetrPreprocessor::PetrPreprocessor(const std::string &config_file) {
+  config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create Paddle3DDetPreprocessor.");
+  initialized_ = true;
+}
+
+bool PetrPreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+
+  processors_.push_back(std::make_shared<Resize>(800, 450));
+  processors_.push_back(std::make_shared<Crop>(0, 130, 800, 320));
+
+  std::vector<float> mean{103.530, 116.280, 123.675};
+  std::vector<float> std{57.375, 57.120, 58.395};
+  bool scale = false;
+  processors_.push_back(std::make_shared<Normalize>(mean, std, scale));
+  processors_.push_back(std::make_shared<Cast>("float"));
+  processors_.push_back(std::make_shared<HWC2CHW>());
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+
+  return true;
+}
+
+bool PetrPreprocessor::Apply(FDMatBatch *image_batch,
+                             std::vector<FDTensor> *outputs) {
+  if (image_batch->mats->empty()) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  // There are 3 outputs, image, k_data, timestamp
+  outputs->resize(3);
+  int num_cams = static_cast<int>(image_batch->mats->size());
+
+  // Allocate memory for k_data
+  (*outputs)[1].Resize({1, num_cams, 4, 4}, FDDataType::FP32);
+
+  // Allocate memory for image_data
+  (*outputs)[0].Resize({1, num_cams, 3, 320, 800}, FDDataType::FP32);
+
+  // Allocate memory for timestamp
+  (*outputs)[2].Resize({1, num_cams}, FDDataType::FP32);
+
+  auto *image_ptr = reinterpret_cast<float *>((*outputs)[0].MutableData());
+
+  auto *k_data_ptr = reinterpret_cast<float *>((*outputs)[1].MutableData());
+
+  auto *timestamp_ptr = reinterpret_cast<float *>((*outputs)[2].MutableData());
+
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    for (size_t j = 0; j < processors_.size(); ++j) {
+      if (!(*(processors_[j].get()))(mat)) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << processors_[j]->Name() << "." << std::endl;
+        return false;
+      }
+    }
+  }
+
+  for (int i = 0; i < num_cams / 2 * 4 * 4; ++i) {
+    input_k_data_.push_back(input_k_data_[i]);
+  }
+  memcpy(k_data_ptr, input_k_data_.data(), num_cams * 16 * sizeof(float));
+
+  std::vector<float> timestamp(num_cams, 0.0f);
+  for (int i = num_cams / 2; i < num_cams; ++i) {
+    timestamp[i] = 1.0f;
+  }
+  memcpy(timestamp_ptr, timestamp.data(), num_cams * sizeof(float));
+
+  FDTensor *tensor = image_batch->Tensor(); // [num_cams,3,320,800]
+  tensor->ExpandDim(0); // [num_cams,3,320,800] -> [1,num_cams,3,320,800]
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h
new file mode 100755
index 0000000000..c5b925a409
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/petr/preprocessor.h
@@ -0,0 +1,88 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Preprocessor object for Petr serials model.
+ */
+class ULTRAINFER_DECL PetrPreprocessor : public ProcessorManager {
+public:
+  PetrPreprocessor() = default;
+  /** \brief Create a preprocessor instance for Petr model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * smoke/infer_cfg.yml
+   */
+  explicit PetrPreprocessor(const std::string &config_file);
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+protected:
+  bool BuildPreprocessPipelineFromConfig();
+  std::vector<std::shared_ptr<Processor>> processors_;
+
+  bool disable_permute_ = false;
+
+  bool initialized_ = false;
+
+  std::string config_file_;
+
+  float scale_ = 1.0f;
+  std::vector<float> mean_;
+  std::vector<float> std_;
+
+  std::vector<float> input_k_data_{
+      -1.40307297e-03, 9.07780395e-06,  4.84838307e-01,  -5.43047376e-02,
+      -1.40780103e-04, 1.25770375e-05,  1.04126692e+00,  7.67668605e-01,
+      -1.02884378e-05, -1.41007011e-03, 1.02823459e-01,  -3.07415128e-01,
+      0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.00000000e+00,
+      -9.39000631e-04, -7.65239349e-07, 1.14073277e+00,  4.46270645e-01,
+      1.04998052e-03,  1.91798881e-05,  2.06218868e-01,  7.42717385e-01,
+      1.48074005e-05,  -1.40855671e-03, 7.45946690e-02,  -3.16081315e-01,
+      0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.00000000e+00,
+      -7.0699735e-04,  4.2389297e-07,   -5.5183989e-01,  -5.3276348e-01,
+      -1.2281288e-03,  2.5626015e-05,   1.0212017e+00,   6.1102939e-01,
+      -2.2421273e-05,  -1.4170362e-03,  9.3639769e-02,   -3.0863306e-01,
+      0.0000000e+00,   0.0000000e+00,   0.0000000e+00,   1.0000000e+00,
+      2.2227580e-03,   2.5312484e-06,   -9.7261822e-01,  9.0684637e-02,
+      1.9360810e-04,   2.1347081e-05,   -1.0779887e+00,  -7.9227984e-01,
+      4.3742721e-06,   -2.2310747e-03,  1.0842450e-01,   -2.9406491e-01,
+      0.0000000e+00,   0.0000000e+00,   0.0000000e+00,   1.0000000e+00,
+      5.97175560e-04,  -5.88774265e-06, -1.15893924e+00, -4.49921310e-01,
+      -1.28312141e-03, 3.58297058e-07,  1.48300052e-01,  1.14334166e-01,
+      -2.80917516e-06, -1.41527120e-03, 8.37693438e-02,  -2.36765608e-01,
+      0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.00000000e+00,
+      3.6048229e-04,   3.8333174e-06,   7.9871160e-01,   4.3321830e-01,
+      1.3671946e-03,   6.7484652e-06,   -8.4722507e-01,  1.9411178e-01,
+      7.5027779e-06,   -1.4139183e-03,  8.2083985e-02,   -2.4505949e-01,
+      0.0000000e+00,   0.0000000e+00,   0.0000000e+00,   1.0000000e+00};
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc
new file mode 100755
index 0000000000..3bf2abc13f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h"
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+SmokePostprocessor::SmokePostprocessor() {}
+
+bool SmokePostprocessor::Run(const std::vector<FDTensor> &tensors,
+                             std::vector<PerceptionResult> *results) {
+  results->resize(1);
+  (*results)[0].Clear();
+  (*results)[0].Reserve(tensors[0].shape[0]);
+  if (tensors[0].dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  const float *data = reinterpret_cast<const float *>(tensors[0].Data());
+  auto result = &(*results)[0];
+  for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 14) {
+    // item 1       :  class
+    // item 2       :  observation angle α
+    // item 3 ~ 6   :  box2d x1, y1, x2, y2
+    // item 7 ~ 9   :  box3d h, w, l
+    // item 10 ~ 12 :  box3d bottom center x, y, z
+    // item 13      :  box3d yaw angle
+    // item 14      :  score
+    std::vector<float> vec(data + i, data + i + 14);
+    result->scores.push_back(vec[13]);
+    result->label_ids.push_back(vec[0]);
+    result->boxes.emplace_back(std::array<float, 7>{
+        vec[2], vec[3], vec[4], vec[5], vec[6], vec[7], vec[8]});
+    result->center.emplace_back(std::array<float, 3>{vec[9], vec[10], vec[11]});
+    result->observation_angle.push_back(vec[1]);
+    result->yaw_angle.push_back(vec[12]);
+  }
+
+  result->valid.push_back(true);  // 0 scores
+  result->valid.push_back(true);  // 1 label_ids
+  result->valid.push_back(true);  // 2 boxes
+  result->valid.push_back(true);  // 3 center
+  result->valid.push_back(true);  // 4 observation_angle
+  result->valid.push_back(true);  // 5 yaw_angle
+  result->valid.push_back(false); // 6 velocity
+
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h
new file mode 100755
index 0000000000..046513cd89
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Postprocessor object for Smoke serials model.
+ */
+class ULTRAINFER_DECL SmokePostprocessor {
+public:
+  /** \brief Create a postprocessor instance for Smoke serials model
+   */
+  SmokePostprocessor();
+
+  /** \brief Process the result of runtime and fill to PerceptionResult
+   * structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor> &tensors,
+           std::vector<PerceptionResult> *results);
+
+protected:
+  float conf_threshold_;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc
new file mode 100755
index 0000000000..22d5a4de9f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.cc
@@ -0,0 +1,161 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h"
+
+#include "ultrainfer/function/concat.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+SmokePreprocessor::SmokePreprocessor(const std::string &config_file) {
+  config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create Paddle3DDetPreprocessor.");
+  initialized_ = true;
+}
+
+bool SmokePreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  // read for preprocess
+  processors_.push_back(std::make_shared<BGR2RGB>());
+
+  bool has_permute = false;
+  for (const auto &op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "NormalizeImage") {
+      auto mean = op["mean"].as<std::vector<float>>();
+      auto std = op["std"].as<std::vector<float>>();
+      bool is_scale = true;
+      if (op["is_scale"]) {
+        is_scale = op["is_scale"].as<bool>();
+      }
+      std::string norm_type = "mean_std";
+      if (op["norm_type"]) {
+        norm_type = op["norm_type"].as<std::string>();
+      }
+      if (norm_type != "mean_std") {
+        std::fill(mean.begin(), mean.end(), 0.0);
+        std::fill(std.begin(), std.end(), 1.0);
+      }
+      processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+    } else if (op_name == "Resize") {
+      bool keep_ratio = op["keep_ratio"].as<bool>();
+      auto target_size = op["target_size"].as<std::vector<int>>();
+      int interp = op["interp"].as<int>();
+      FDASSERT(target_size.size() == 2,
+               "Require size of target_size be 2, but now it's %lu.",
+               target_size.size());
+      if (!keep_ratio) {
+        int width = target_size[1];
+        int height = target_size[0];
+        processors_.push_back(
+            std::make_shared<Resize>(width, height, -1.0, -1.0, interp, false));
+      } else {
+        int min_target_size = std::min(target_size[0], target_size[1]);
+        int max_target_size = std::max(target_size[0], target_size[1]);
+        std::vector<int> max_size;
+        if (max_target_size > 0) {
+          max_size.push_back(max_target_size);
+          max_size.push_back(max_target_size);
+        }
+        processors_.push_back(std::make_shared<ResizeByShort>(
+            min_target_size, interp, true, max_size));
+      }
+    } else if (op_name == "Permute") {
+      // Do nothing, do permute as the last operation
+      has_permute = true;
+      continue;
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+  if (!disable_permute_) {
+    if (has_permute) {
+      // permute = cast<float> + HWC2CHW
+      processors_.push_back(std::make_shared<Cast>("float"));
+      processors_.push_back(std::make_shared<HWC2CHW>());
+    }
+  }
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+
+  input_k_data_ = cfg["k_data"].as<std::vector<float>>();
+  input_ratio_data_ = cfg["ratio_data"].as<std::vector<float>>();
+  return true;
+}
+
+bool SmokePreprocessor::Apply(FDMatBatch *image_batch,
+                              std::vector<FDTensor> *outputs) {
+  if (image_batch->mats->empty()) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  // There are 3 outputs, image, k_data, ratio_data
+  outputs->resize(3);
+  int batch = static_cast<int>(image_batch->mats->size());
+
+  // Allocate memory for k_data
+  (*outputs)[2].Resize({batch, 3, 3}, FDDataType::FP32);
+
+  // Allocate memory for ratio_data
+  (*outputs)[0].Resize({batch, 2}, FDDataType::FP32);
+
+  auto *k_data_ptr = reinterpret_cast<float *>((*outputs)[2].MutableData());
+
+  auto *ratio_data_ptr = reinterpret_cast<float *>((*outputs)[0].MutableData());
+
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat *mat = &(image_batch->mats->at(i));
+    for (size_t j = 0; j < processors_.size(); ++j) {
+      if (!(*(processors_[j].get()))(mat)) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << processors_[j]->Name() << "." << std::endl;
+        return false;
+      }
+    }
+
+    memcpy(k_data_ptr + i * 9, input_k_data_.data(), 9 * sizeof(float));
+    memcpy(ratio_data_ptr + i * 2, input_ratio_data_.data(), 2 * sizeof(float));
+  }
+
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[1].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h
new file mode 100755
index 0000000000..e4409a5ea1
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h
@@ -0,0 +1,62 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+namespace perception {
+/*! @brief Preprocessor object for Smoke serials model.
+ */
+class ULTRAINFER_DECL SmokePreprocessor : public ProcessorManager {
+public:
+  SmokePreprocessor() = default;
+  /** \brief Create a preprocessor instance for Smoke model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * smoke/infer_cfg.yml
+   */
+  explicit SmokePreprocessor(const std::string &config_file);
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned
+   * by cv::imread() \param[in] outputs The output tensors which will feed in
+   * runtime \param[in] ims_info The shape info list, record input_shape and
+   * output_shape \return true if the preprocess successed, otherwise false
+   */
+  bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+protected:
+  bool BuildPreprocessPipelineFromConfig();
+  std::vector<std::shared_ptr<Processor>> processors_;
+
+  bool disable_permute_ = false;
+
+  bool initialized_ = false;
+
+  std::string config_file_;
+
+  std::vector<float> input_k_data_;
+
+  std::vector<float> input_ratio_data_;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc
new file mode 100755
index 0000000000..47a93fc161
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.cc
@@ -0,0 +1,82 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/perception/paddle3d/smoke/smoke.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+
+Smoke::Smoke(const std::string &model_file, const std::string &params_file,
+             const std::string &config_file, const RuntimeOption &custom_option,
+             const ModelFormat &model_format)
+    : preprocessor_(config_file) {
+  valid_cpu_backends = {Backend::PDINFER};
+  valid_gpu_backends = {Backend::PDINFER};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool Smoke::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool Smoke::Predict(const cv::Mat &im, PerceptionResult *result) {
+  std::vector<PerceptionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  if (results.size()) {
+    *result = std::move(results[0]);
+  }
+  return true;
+}
+
+bool Smoke::BatchPredict(const std::vector<cv::Mat> &images,
+                         std::vector<PerceptionResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[1].name = InputInfoOfRuntime(1).name;
+  reused_input_tensors_[2].name = InputInfoOfRuntime(2).name;
+
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h
new file mode 100755
index 0000000000..ee496888b0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke.h
@@ -0,0 +1,77 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/perception/paddle3d/smoke/postprocessor.h"
+#include "ultrainfer/vision/perception/paddle3d/smoke/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace perception {
+/*! @brief smoke model object used when to load a smoke model exported by smoke.
+ */
+class ULTRAINFER_DECL Smoke : public UltraInferModel {
+public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g smoke/model.pdiparams
+   * \param[in] params_file Path of parameter file, e.g smoke/model.pdiparams,
+   * if the model format is ONNX, this parameter will be ignored \param[in]
+   * custom_option RuntimeOption for inference, the default will use cpu, and
+   * choose the backend defined in "valid_cpu_backends" \param[in] model_format
+   * Model format of the loaded model, default is Paddle format
+   */
+  Smoke(const std::string &model_file, const std::string &params_file,
+        const std::string &config_file,
+        const RuntimeOption &custom_option = RuntimeOption(),
+        const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "Paddle3D/smoke"; }
+
+  /** \brief Predict the perception result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D
+   * array with layout HWC, BGR format \param[in] result The output perception
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &img, PerceptionResult *result);
+
+  /** \brief Predict the perception results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output perception result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<PerceptionResult> *results);
+
+  /// Get preprocessor reference of Smoke
+  virtual SmokePreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of Smoke
+  virtual SmokePostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  SmokePreprocessor preprocessor_;
+  SmokePostprocessor postprocessor_;
+  bool initialized_ = false;
+};
+
+} // namespace perception
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc
new file mode 100755
index 0000000000..f16b5b064e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/paddle3d/smoke/smoke_pybind.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindSmoke(pybind11::module &m) {
+  pybind11::class_<vision::perception::SmokePreprocessor,
+                   vision::ProcessorManager>(m, "SmokePreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run", [](vision::perception::SmokePreprocessor &self,
+                     std::vector<pybind11::array> &im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        if (!self.Run(&images, &outputs)) {
+          throw std::runtime_error(
+              "Failed to preprocess the input data in SmokePreprocessor.");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return outputs;
+      });
+
+  pybind11::class_<vision::perception::SmokePostprocessor>(m,
+                                                           "SmokePostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::perception::SmokePostprocessor &self,
+              std::vector<FDTensor> &inputs) {
+             std::vector<vision::PerceptionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "SmokePostprocessor.");
+             }
+             return results;
+           })
+      .def("run", [](vision::perception::SmokePostprocessor &self,
+                     std::vector<pybind11::array> &input_array) {
+        std::vector<vision::PerceptionResult> results;
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error(
+              "Failed to postprocess the runtime result in "
+              "SmokePostprocessor.");
+        }
+        return results;
+      });
+
+  pybind11::class_<vision::perception::Smoke, UltraInferModel>(m, "Smoke")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::perception::Smoke &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::PerceptionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::perception::Smoke &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::PerceptionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::perception::Smoke::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::perception::Smoke::GetPostprocessor);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc b/libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc
new file mode 100755
index 0000000000..ba68dbb4d6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/perception/perception_pybind.cc
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindSmoke(pybind11::module &m);
+void BindPetr(pybind11::module &m);
+void BindCenterpoint(pybind11::module &m);
+void BindCaddn(pybind11::module &m);
+
+void BindPerception(pybind11::module &m) {
+  auto perception_module =
+      m.def_submodule("perception", "3D object perception models.");
+  BindSmoke(perception_module);
+  BindPetr(perception_module);
+  BindCenterpoint(perception_module);
+  BindCaddn(perception_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc
new file mode 100755
index 0000000000..fb9eea1299
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.cc
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/segmentation/ppseg/model.h"
+
+#include "ultrainfer/utils/unique_ptr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace segmentation {
+
+PaddleSegModel::PaddleSegModel(const std::string &model_file,
+                               const std::string &params_file,
+                               const std::string &config_file,
+                               const RuntimeOption &custom_option,
+                               const ModelFormat &model_format)
+    : preprocessor_(config_file), postprocessor_(config_file) {
+  if (model_format == ModelFormat::SOPHGO) {
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+  } else {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  valid_rknpu_backends = {Backend::RKNPU2};
+  valid_timvx_backends = {Backend::LITE};
+  valid_kunlunxin_backends = {Backend::LITE};
+  valid_ascend_backends = {Backend::LITE};
+  valid_directml_backends = {Backend::ORT};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+std::unique_ptr<PaddleSegModel> PaddleSegModel::Clone() const {
+  std::unique_ptr<PaddleSegModel> clone_model =
+      ultrainfer::utils::make_unique<PaddleSegModel>(PaddleSegModel(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool PaddleSegModel::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PaddleSegModel::Predict(cv::Mat *im, SegmentationResult *result) {
+  return Predict(*im, result);
+}
+
+bool PaddleSegModel::Predict(const cv::Mat &im, SegmentationResult *result) {
+  std::vector<SegmentationResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool PaddleSegModel::BatchPredict(const std::vector<cv::Mat> &imgs,
+                                  std::vector<SegmentationResult> *results) {
+  std::vector<FDMat> fd_images = WrapMat(imgs);
+  // Record the shape of input images
+  std::map<std::string, std::vector<std::array<int, 2>>> imgs_info;
+  preprocessor_.SetImgsInfo(&imgs_info);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  if (!postprocessor_.Run(reused_output_tensors_, results, imgs_info)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+} // namespace segmentation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h
new file mode 100755
index 0000000000..41d02c1da7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/model.h
@@ -0,0 +1,99 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/segmentation/ppseg/postprocessor.h"
+#include "ultrainfer/vision/segmentation/ppseg/preprocessor.h"
+
+namespace ultrainfer {
+namespace vision {
+/** \brief All segmentation model APIs are defined inside this namespace
+ *
+ */
+namespace segmentation {
+
+/*! @brief PaddleSeg serials model object used when to load a PaddleSeg model
+ * exported by PaddleSeg repository
+ */
+class ULTRAINFER_DECL PaddleSegModel : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g unet/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g unet/model.pdiparams, if
+   * the model format is ONNX, this parameter will be ignored \param[in]
+   * config_file Path of configuration file for deployment, e.g unet/deploy.yml
+   * \param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends` \param[in]
+   * model_format Model format of the loaded model, default is Paddle format
+   */
+  PaddleSegModel(const std::string &model_file, const std::string &params_file,
+                 const std::string &config_file,
+                 const RuntimeOption &custom_option = RuntimeOption(),
+                 const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new PaddleSegModel with less memory usage when multiple
+   * instances of the same model are created
+   *
+   * \return new PaddleDetModel* type unique pointer
+   */
+  virtual std::unique_ptr<PaddleSegModel> Clone() const;
+
+  /// Get model's name
+  std::string ModelName() const { return "PaddleSeg"; }
+
+  /** \brief DEPRECATED Predict the segmentation result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output segmentation
+   * result will be writen to this structure \return true if the segmentation
+   * prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *im, SegmentationResult *result);
+
+  /** \brief Predict the segmentation result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array
+   * with layout HWC, BGR format \param[in] result The output segmentation
+   * result will be writen to this structure \return true if the segmentation
+   * prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat &im, SegmentationResult *result);
+
+  /** \brief Predict the segmentation results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output segmentation result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat> &imgs,
+                            std::vector<SegmentationResult> *results);
+
+  /// Get preprocessor reference of PaddleSegModel
+  virtual PaddleSegPreprocessor &GetPreprocessor() { return preprocessor_; }
+
+  /// Get postprocessor reference of PaddleSegModel
+  virtual PaddleSegPostprocessor &GetPostprocessor() { return postprocessor_; }
+
+protected:
+  bool Initialize();
+  PaddleSegPreprocessor preprocessor_;
+  PaddleSegPostprocessor postprocessor_;
+};
+
+} // namespace segmentation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc
new file mode 100755
index 0000000000..62d6c83e10
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.cc
@@ -0,0 +1,291 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/segmentation/ppseg/postprocessor.h"
+#include "ultrainfer/function/cast.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace segmentation {
+
+PaddleSegPostprocessor::PaddleSegPostprocessor(const std::string &config_file) {
+  FDASSERT(ReadFromConfig(config_file),
+           "Failed to create PaddleSegPreprocessor.");
+  initialized_ = true;
+}
+
+bool PaddleSegPostprocessor::ReadFromConfig(const std::string &config_file) {
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  if (cfg["Deploy"]["output_op"]) {
+    std::string output_op = cfg["Deploy"]["output_op"].as<std::string>();
+    if (output_op == "softmax") {
+      is_with_softmax_ = true;
+      is_with_argmax_ = false;
+    } else if (output_op == "argmax") {
+      is_with_softmax_ = false;
+      is_with_argmax_ = true;
+    } else if (output_op == "none") {
+      is_with_softmax_ = false;
+      is_with_argmax_ = false;
+    } else {
+      FDERROR << "Unexcepted output_op operator in deploy.yml: " << output_op
+              << "." << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
+bool PaddleSegPostprocessor::SliceOneResultFromBatchInferResults(
+    const FDTensor &infer_results, FDTensor *infer_result,
+    const std::vector<int64_t> &infer_result_shape, const int64_t &start_idx) {
+  int64_t infer_batch = infer_results.shape[0];
+  if (infer_batch == 1) {
+    *infer_result = infer_results;
+    // batch is 1, so ignore
+    infer_result->shape = infer_result_shape;
+  } else {
+    if (infer_results.dtype == FDDataType::FP32) {
+      const float_t *infer_results_ptr =
+          reinterpret_cast<const float_t *>(infer_results.CpuData()) +
+          start_idx;
+      infer_result->SetExternalData(
+          infer_result_shape, FDDataType::FP32,
+          reinterpret_cast<void *>(const_cast<float_t *>(infer_results_ptr)));
+    } else if (infer_results.dtype == FDDataType::INT64) {
+      const int64_t *infer_results_ptr =
+          reinterpret_cast<const int64_t *>(infer_results.CpuData()) +
+          start_idx;
+      infer_result->SetExternalData(
+          infer_result_shape, FDDataType::INT64,
+          reinterpret_cast<void *>(const_cast<int64_t *>(infer_results_ptr)));
+    } else if (infer_results.dtype == FDDataType::INT32) {
+      const int32_t *infer_results_ptr =
+          reinterpret_cast<const int32_t *>(infer_results.CpuData()) +
+          start_idx;
+      infer_result->SetExternalData(
+          infer_result_shape, FDDataType::INT32,
+          reinterpret_cast<void *>(const_cast<int32_t *>(infer_results_ptr)));
+    } else if (infer_results.dtype == FDDataType::UINT8) {
+      const uint8_t *infer_results_ptr =
+          reinterpret_cast<const uint8_t *>(infer_results.CpuData()) +
+          start_idx;
+      infer_result->SetExternalData(
+          infer_result_shape, FDDataType::UINT8,
+          reinterpret_cast<void *>(const_cast<uint8_t *>(infer_results_ptr)));
+    } else {
+      FDASSERT(
+          false,
+          "Require the data type for slicing is int64, fp32 or int32, but now "
+          "it's %s.",
+          Str(infer_results.dtype).c_str())
+      return false;
+    }
+  }
+  return true;
+}
+
+bool PaddleSegPostprocessor::ProcessWithScoreResult(
+    const FDTensor &infer_result, const int64_t &out_num,
+    SegmentationResult *result) {
+  const uint8_t *argmax_infer_result_buffer = nullptr;
+  const float_t *score_infer_result_buffer = nullptr;
+  FDTensor argmax_infer_result;
+  FDTensor max_score_result;
+  std::vector<int64_t> reduce_dim{-1};
+  function::ArgMax(infer_result, &argmax_infer_result, -1, FDDataType::UINT8);
+  function::Max(infer_result, &max_score_result, reduce_dim);
+  score_infer_result_buffer =
+      reinterpret_cast<const float_t *>(max_score_result.CpuData());
+  std::memcpy(result->score_map.data(), score_infer_result_buffer,
+              out_num * sizeof(float_t));
+
+  argmax_infer_result_buffer =
+      reinterpret_cast<const uint8_t *>(argmax_infer_result.CpuData());
+
+  std::memcpy(result->label_map.data(), argmax_infer_result_buffer,
+              out_num * sizeof(uint8_t));
+
+  return true;
+}
+
+bool PaddleSegPostprocessor::ProcessWithLabelResult(
+    const FDTensor &infer_result, const int64_t &out_num,
+    SegmentationResult *result) {
+  if (infer_result.dtype == FDDataType::INT64) {
+    const int64_t *infer_result_buffer =
+        reinterpret_cast<const int64_t *>(infer_result.CpuData());
+    for (int i = 0; i < out_num; i++) {
+      result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+    }
+  } else if (infer_result.dtype == FDDataType::INT32) {
+    const int32_t *infer_result_buffer =
+        reinterpret_cast<const int32_t *>(infer_result.CpuData());
+    for (int i = 0; i < out_num; i++) {
+      result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
+    }
+  } else if (infer_result.dtype == FDDataType::UINT8) {
+    const uint8_t *infer_result_buffer =
+        reinterpret_cast<const uint8_t *>(infer_result.CpuData());
+    memcpy(result->label_map.data(), infer_result_buffer,
+           out_num * sizeof(uint8_t));
+  } else {
+    FDASSERT(
+        false,
+        "Require the data type to process is int64, int32 or uint8, but now "
+        "it's %s.",
+        Str(infer_result.dtype).c_str());
+    return false;
+  }
+  return true;
+}
+
+bool PaddleSegPostprocessor::Run(
+    const std::vector<FDTensor> &infer_results,
+    std::vector<SegmentationResult> *results,
+    const std::map<std::string, std::vector<std::array<int, 2>>> &imgs_info) {
+  // PaddleSeg has three types of inference output:
+  //     1. output with argmax and without softmax. 3-D matrix N(C)HW, Channel
+  //     is batch_size, the element in matrix is classified label_id INT64 type.
+  //     2. output without argmax and without softmax. 4-D matrix NCHW, N(batch)
+  //     is batch_size, Channel is the num of classes. The element is the logits
+  //     of classes FP32 type
+  //     3. output without argmax and with softmax. 4-D matrix NCHW, the result
+  //     of 2 with softmax layer
+  // Xdeploy output:
+  //     1. label_map
+  //     2. score_map(optional)
+  //     3. shape: 2-D HW
+  if (!initialized_) {
+    FDERROR << "Postprocessor is not initialized." << std::endl;
+    return false;
+  }
+
+  FDDataType infer_results_dtype = infer_results[0].dtype;
+  FDASSERT(infer_results_dtype == FDDataType::INT64 ||
+               infer_results_dtype == FDDataType::FP32 ||
+               infer_results_dtype == FDDataType::INT32,
+           "Require the data type of output is int64, fp32 or int32, but now "
+           "it's %s.",
+           Str(infer_results_dtype).c_str());
+
+  auto iter_input_imgs_shape_list = imgs_info.find("shape_info");
+  FDASSERT(iter_input_imgs_shape_list != imgs_info.end(),
+           "Cannot find shape_info from imgs_info.");
+
+  // For Argmax Softmax function to store transformed result below
+  FDTensor transform_infer_results;
+
+  int64_t infer_batch = infer_results[0].shape[0];
+  int64_t infer_channel = 0;
+  int64_t infer_height = 0;
+  int64_t infer_width = 0;
+
+  if (is_with_argmax_) {
+    // infer_results with argmax
+    infer_channel = 1;
+    infer_height = infer_results[0].shape[1];
+    infer_width = infer_results[0].shape[2];
+  } else {
+    // infer_results without argmax
+    infer_channel = 1;
+    infer_height = infer_results[0].shape[2];
+    infer_width = infer_results[0].shape[3];
+    if (store_score_map_) {
+      infer_channel = infer_results[0].shape[1];
+      std::vector<int64_t> dim{0, 2, 3, 1};
+      function::Transpose(infer_results[0], &transform_infer_results, dim);
+      if (!is_with_softmax_ && apply_softmax_) {
+        function::Softmax(transform_infer_results, &transform_infer_results, 1);
+      }
+    } else {
+      function::ArgMax(infer_results[0], &transform_infer_results, 1,
+                       FDDataType::UINT8);
+      infer_results_dtype = transform_infer_results.dtype;
+    }
+  }
+
+  int64_t infer_chw = infer_channel * infer_height * infer_width;
+
+  results->resize(infer_batch);
+  for (int i = 0; i < infer_batch; i++) {
+    SegmentationResult *result = &((*results)[i]);
+    result->Clear();
+    int64_t start_idx = i * infer_chw;
+
+    FDTensor infer_result;
+    std::vector<int64_t> infer_result_shape = {infer_height, infer_width,
+                                               infer_channel};
+
+    if (is_with_argmax_) {
+      SliceOneResultFromBatchInferResults(infer_results[0], &infer_result,
+                                          infer_result_shape, start_idx);
+    } else {
+      SliceOneResultFromBatchInferResults(transform_infer_results,
+                                          &infer_result, infer_result_shape,
+                                          start_idx);
+    }
+    bool is_resized = false;
+    int input_height = iter_input_imgs_shape_list->second[i][0];
+    int input_width = iter_input_imgs_shape_list->second[i][1];
+    if (input_height != infer_height || input_width != infer_width) {
+      is_resized = true;
+    }
+
+    FDMat mat;
+    // Resize interpration
+    int interpolation = cv::INTER_LINEAR;
+    if (is_resized) {
+      if (infer_results_dtype == FDDataType::INT64 ||
+          infer_results_dtype == FDDataType::INT32) {
+        function::Cast(infer_result, &infer_result, FDDataType::UINT8);
+        // label map resize with nearest interpolation
+        interpolation = cv::INTER_NEAREST;
+      }
+      mat = std::move(Mat::Create(infer_result, ProcLib::OPENCV));
+      Resize::Run(&mat, input_width, input_height, -1.0f, -1.0f, interpolation,
+                  false, ProcLib::OPENCV);
+      mat.ShareWithTensor(&infer_result);
+    }
+    result->shape = infer_result.shape;
+    // output shape is 2-D HW layout, so out_num = H * W
+    int out_num =
+        std::accumulate(result->shape.begin(), result->shape.begin() + 2, 1,
+                        std::multiplies<int>());
+
+    if (!is_with_argmax_ && store_score_map_) {
+      // output with label_map and score_map
+      result->contain_score_map = true;
+      result->Resize(out_num);
+      ProcessWithScoreResult(infer_result, out_num, result);
+    } else {
+      result->Resize(out_num);
+      ProcessWithLabelResult(infer_result, out_num, result);
+    }
+    // HWC remove C
+    result->shape.erase(result->shape.begin() + 2);
+  }
+  return true;
+}
+} // namespace segmentation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h
new file mode 100755
index 0000000000..97b1b93606
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/postprocessor.h
@@ -0,0 +1,89 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace segmentation {
+/*! @brief Postprocessor object for PaddleSeg serials model.
+ */
+class ULTRAINFER_DECL PaddleSegPostprocessor {
+public:
+  /** \brief Create a postprocessor instance for PaddleSeg serials model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * ppliteseg/deploy.yaml
+   */
+  explicit PaddleSegPostprocessor(const std::string &config_file);
+
+  /** \brief Process the result of runtime and fill to SegmentationResult
+   * structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] imgs_info The original input images shape info map, key is
+   * "shape_info", value is vector<array<int, 2>> a{{height, width}} \return
+   * true if the postprocess successed, otherwise false
+   */
+  virtual bool
+  Run(const std::vector<FDTensor> &infer_results,
+      std::vector<SegmentationResult> *results,
+      const std::map<std::string, std::vector<std::array<int, 2>>> &imgs_info);
+
+  /** \brief Get apply_softmax property of PaddleSeg model, default is false
+   */
+  bool GetApplySoftmax() const { return apply_softmax_; }
+
+  /// Set apply_softmax value, bool type required
+  void SetApplySoftmax(bool value) { apply_softmax_ = value; }
+
+  /// Get store_score_map property of PaddleSeg model, default is false
+  bool GetStoreScoreMap() const { return store_score_map_; }
+
+  /// Set store_score_map value, bool type required
+  void SetStoreScoreMap(bool value) { store_score_map_ = value; }
+
+private:
+  virtual bool ReadFromConfig(const std::string &config_file);
+
+  virtual bool SliceOneResultFromBatchInferResults(
+      const FDTensor &infer_results, FDTensor *infer_result,
+      const std::vector<int64_t> &infer_result_shape, const int64_t &start_idx);
+
+  virtual bool ProcessWithScoreResult(const FDTensor &infer_result,
+                                      const int64_t &out_num,
+                                      SegmentationResult *result);
+
+  virtual bool ProcessWithLabelResult(const FDTensor &infer_result,
+                                      const int64_t &out_num,
+                                      SegmentationResult *result);
+
+  bool is_with_softmax_ = false;
+
+  bool is_with_argmax_ = true;
+
+  bool apply_softmax_ = false;
+
+  bool store_score_map_ = false;
+
+  bool initialized_ = false;
+};
+
+} // namespace segmentation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc
new file mode 100755
index 0000000000..67ecfa6bee
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/ppseg_pybind.cc
@@ -0,0 +1,130 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPSeg(pybind11::module &m) {
+  pybind11::class_<vision::segmentation::PaddleSegPreprocessor,
+                   vision::ProcessorManager>(m, "PaddleSegPreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run",
+           [](vision::segmentation::PaddleSegPreprocessor &self,
+              std::vector<pybind11::array> &im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             // Record the shape of input images
+             std::map<std::string, std::vector<std::array<int, 2>>> imgs_info;
+             std::vector<FDTensor> outputs;
+             self.SetImgsInfo(&imgs_info);
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "PaddleSegPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return make_pair(outputs, imgs_info);
+             ;
+           })
+      .def("disable_normalize",
+           [](vision::segmentation::PaddleSegPreprocessor &self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::segmentation::PaddleSegPreprocessor &self) {
+             self.DisablePermute();
+           })
+      .def_property(
+          "is_vertical_screen",
+          &vision::segmentation::PaddleSegPreprocessor::GetIsVerticalScreen,
+          &vision::segmentation::PaddleSegPreprocessor::SetIsVerticalScreen);
+
+  pybind11::class_<vision::segmentation::PaddleSegModel, UltraInferModel>(
+      m, "PaddleSegModel")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("clone",
+           [](vision::segmentation::PaddleSegModel &self) {
+             return self.Clone();
+           })
+      .def("predict",
+           [](vision::segmentation::PaddleSegModel &self,
+              pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::SegmentationResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::segmentation::PaddleSegModel &self,
+              std::vector<pybind11::array> &data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::SegmentationResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly(
+          "preprocessor",
+          &vision::segmentation::PaddleSegModel::GetPreprocessor)
+      .def_property_readonly(
+          "postprocessor",
+          &vision::segmentation::PaddleSegModel::GetPostprocessor);
+
+  pybind11::class_<vision::segmentation::PaddleSegPostprocessor>(
+      m, "PaddleSegPostprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run",
+           [](vision::segmentation::PaddleSegPostprocessor &self,
+              std::vector<FDTensor> &inputs,
+              const std::map<std::string, std::vector<std::array<int, 2>>>
+                  &imgs_info) {
+             std::vector<vision::SegmentationResult> results;
+             if (!self.Run(inputs, &results, imgs_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PaddleSegPostprocessor.");
+             }
+             return results;
+           })
+      .def("run",
+           [](vision::segmentation::PaddleSegPostprocessor &self,
+              std::vector<pybind11::array> &input_array,
+              const std::map<std::string, std::vector<std::array<int, 2>>>
+                  &imgs_info) {
+             std::vector<vision::SegmentationResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, imgs_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PaddleSegPostprocessor.");
+             }
+             return results;
+           })
+      .def_property(
+          "apply_softmax",
+          &vision::segmentation::PaddleSegPostprocessor::GetApplySoftmax,
+          &vision::segmentation::PaddleSegPostprocessor::SetApplySoftmax)
+      .def_property(
+          "store_score_map",
+          &vision::segmentation::PaddleSegPostprocessor::GetStoreScoreMap,
+          &vision::segmentation::PaddleSegPostprocessor::SetStoreScoreMap);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc
new file mode 100755
index 0000000000..4ed2c15b65
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.cc
@@ -0,0 +1,180 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/segmentation/ppseg/preprocessor.h"
+
+#include "ultrainfer/function/concat.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace segmentation {
+
+PaddleSegPreprocessor::PaddleSegPreprocessor(const std::string &config_file) {
+  this->config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create PaddleSegPreprocessor.");
+  initialized_ = true;
+}
+
+bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  processors_.push_back(std::make_shared<BGR2RGB>());
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  if (cfg["Deploy"]["transforms"]) {
+    auto preprocess_cfg = cfg["Deploy"]["transforms"];
+    for (const auto &op : preprocess_cfg) {
+      FDASSERT(op.IsMap(),
+               "Require the transform information in yaml be Map type.");
+      if (op["type"].as<std::string>() == "Normalize") {
+        if (!disable_normalize_) {
+          std::vector<float> mean = {0.5, 0.5, 0.5};
+          std::vector<float> std = {0.5, 0.5, 0.5};
+          if (op["mean"]) {
+            mean = op["mean"].as<std::vector<float>>();
+          }
+          if (op["std"]) {
+            std = op["std"].as<std::vector<float>>();
+          }
+          processors_.push_back(std::make_shared<Normalize>(mean, std));
+        }
+      } else if (op["type"].as<std::string>() == "Resize") {
+        is_contain_resize_op_ = true;
+        const auto &target_size = op["target_size"];
+        int resize_width = target_size[0].as<int>();
+        int resize_height = target_size[1].as<int>();
+        processors_.push_back(
+            std::make_shared<Resize>(resize_width, resize_height));
+      } else {
+        std::string op_name = op["type"].as<std::string>();
+        FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+                << std::endl;
+        return false;
+      }
+    }
+  }
+  if (cfg["Deploy"]["input_shape"]) {
+    auto input_shape = cfg["Deploy"]["input_shape"];
+    int input_height = input_shape[2].as<int>();
+    int input_width = input_shape[3].as<int>();
+    if (input_height != -1 && input_width != -1 && !is_contain_resize_op_) {
+      is_contain_resize_op_ = true;
+      processors_.insert(processors_.begin(),
+                         std::make_shared<Resize>(input_width, input_height));
+    }
+  }
+  if (!disable_permute_) {
+    processors_.push_back(std::make_shared<HWC2CHW>());
+  }
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+  return true;
+}
+
+bool PaddleSegPreprocessor::Apply(FDMatBatch *image_batch,
+                                  std::vector<FDTensor> *outputs) {
+  std::vector<FDMat> *images = image_batch->mats;
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  std::vector<std::array<int, 2>> shape_info;
+  for (const auto &image : *images) {
+    shape_info.push_back(
+        {static_cast<int>(image.Height()), static_cast<int>(image.Width())});
+  }
+  (*imgs_info_)["shape_info"] = shape_info;
+  for (size_t i = 0; i < processors_.size(); ++i) {
+    if (processors_[i]->Name() == "Resize") {
+      auto processor = dynamic_cast<Resize *>(processors_[i].get());
+      int resize_width = -1;
+      int resize_height = -1;
+      std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
+      if (is_vertical_screen_ && (resize_width > resize_height)) {
+        if (!(processor->SetWidthAndHeight(resize_height, resize_width))) {
+          FDERROR << "Failed to set width and height of "
+                  << processors_[i]->Name() << " processor." << std::endl;
+        }
+      }
+      break;
+    }
+  }
+  size_t img_num = images->size();
+  // Batch preprocess : resize all images to the largest image shape in batch
+  if (!is_contain_resize_op_ && img_num > 1) {
+    int max_width = 0;
+    int max_height = 0;
+    for (size_t i = 0; i < img_num; ++i) {
+      max_width = std::max(max_width, ((*images)[i]).Width());
+      max_height = std::max(max_height, ((*images)[i]).Height());
+    }
+    pre_resize_op_->SetWidthAndHeight(max_width, max_height);
+    for (size_t i = 0; i < img_num; ++i) {
+      if (!(*pre_resize_op_)(&(*images)[i])) {
+        FDERROR << "Failed to batch resize max_width and max_height"
+                << std::endl;
+      }
+    }
+  }
+  for (size_t i = 0; i < img_num; ++i) {
+    for (size_t j = 0; j < processors_.size(); ++j) {
+      if (!(*(processors_[j].get()))(&((*images)[i]))) {
+        FDERROR << "Failed to process image data in " << processors_[i]->Name()
+                << "." << std::endl;
+        return false;
+      }
+    }
+  }
+  outputs->resize(1);
+  FDTensor *tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+void PaddleSegPreprocessor::DisableNormalize() {
+  this->disable_normalize_ = true;
+  // the DisableNormalize function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+void PaddleSegPreprocessor::DisablePermute() {
+  this->disable_permute_ = true;
+  // the DisablePermute function will be invalid if the configuration file is
+  // loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+  }
+}
+} // namespace segmentation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h
new file mode 100755
index 0000000000..3810476753
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/ppseg/preprocessor.h
@@ -0,0 +1,88 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/vision/common/processors/manager.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace segmentation {
+/*! @brief Preprocessor object for PaddleSeg serials model.
+ */
+class ULTRAINFER_DECL PaddleSegPreprocessor : public ProcessorManager {
+public:
+  /** \brief Create a preprocessor instance for PaddleSeg serials model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g
+   * ppliteseg/deploy.yaml
+   */
+  explicit PaddleSegPreprocessor(const std::string &config_file);
+
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
+   *
+   * \param[in] image_batch The input image batch
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  virtual bool Apply(FDMatBatch *image_batch, std::vector<FDTensor> *outputs);
+
+  /// Get is_vertical_screen property of PP-HumanSeg model, default is false
+  bool GetIsVerticalScreen() const { return is_vertical_screen_; }
+
+  /// Set is_vertical_screen value, bool type required
+  void SetIsVerticalScreen(bool value) { is_vertical_screen_ = value; }
+
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize();
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute();
+  /// This function will set imgs_info_ in PaddleSegPreprocessor
+  void SetImgsInfo(
+      std::map<std::string, std::vector<std::array<int, 2>>> *imgs_info) {
+    imgs_info_ = imgs_info;
+  }
+  /// This function will get imgs_info_ in PaddleSegPreprocessor
+  std::map<std::string, std::vector<std::array<int, 2>>> *GetImgsInfo() {
+    return imgs_info_;
+  }
+
+private:
+  virtual bool BuildPreprocessPipelineFromConfig();
+  std::vector<std::shared_ptr<Processor>> processors_;
+  std::string config_file_;
+
+  /** \brief For PP-HumanSeg model, set true if the input image is vertical
+   * image(height > width), default value is false
+   */
+  bool is_vertical_screen_ = false;
+
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+
+  bool is_contain_resize_op_ = false;
+
+  bool initialized_ = false;
+
+  std::map<std::string, std::vector<std::array<int, 2>>> *imgs_info_;
+  std::shared_ptr<Resize> pre_resize_op_ = std::make_shared<Resize>(0, 0);
+};
+
+} // namespace segmentation
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc b/libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc
new file mode 100755
index 0000000000..2e5706020f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/segmentation/segmentation_pybind.cc
@@ -0,0 +1,26 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindPPSeg(pybind11::module &m);
+
+void BindSegmentation(pybind11::module &m) {
+  auto segmentation_module =
+      m.def_submodule("segmentation", "Image semantic segmentation models.");
+  BindPPSeg(segmentation_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc
new file mode 100755
index 0000000000..ae1bc25554
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/sr/ppsr/basicvsr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace sr {
+
+BasicVSR::BasicVSR(const std::string &model_file,
+                   const std::string &params_file,
+                   const RuntimeOption &custom_option,
+                   const ModelFormat &model_format) {
+  // unsupported ORT backend
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO};
+  valid_gpu_backends = {Backend::PDINFER, Backend::TRT, Backend::ORT};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+
+  initialized = Initialize();
+}
+} // namespace sr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h
new file mode 100755
index 0000000000..2c6a35390a
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/basicvsr.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace sr {
+
+class ULTRAINFER_DECL BasicVSR : public PPMSVSR {
+public:
+  /**
+   * Set path of model file and configuration file, and the configuration of
+   * runtime
+   * @param[in] model_file Path of model file, e.g BasicVSR/model.pdmodel
+   * @param[in] params_file Path of parameter file, e.g BasicVSR/model.pdiparams
+   * @param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends`
+   * @param[in] model_format Model format of the loaded model, default is Paddle
+   * format
+   */
+  BasicVSR(const std::string &model_file, const std::string &params_file,
+           const RuntimeOption &custom_option = RuntimeOption(),
+           const ModelFormat &model_format = ModelFormat::PADDLE);
+  /// model name contained BasicVSR
+  std::string ModelName() const override { return "BasicVSR"; }
+};
+
+} // namespace sr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc
new file mode 100755
index 0000000000..dc2905a3e7
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/sr/ppsr/edvr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace sr {
+
+EDVR::EDVR(const std::string &model_file, const std::string &params_file,
+           const RuntimeOption &custom_option,
+           const ModelFormat &model_format) {
+  // unsupported ORT backend
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO};
+  valid_gpu_backends = {Backend::PDINFER, Backend::TRT, Backend::ORT};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+
+  initialized = Initialize();
+}
+
+bool EDVR::Postprocess(std::vector<FDTensor> &infer_results,
+                       std::vector<cv::Mat> &results) {
+  // group to image
+  // output_shape is [b, n, c, h, w] n = frame_nums b=1(default)
+  // b and n is dependence export model shape
+  // see
+  // https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/video_super_resolution.md
+  auto output_shape = infer_results[0].shape;
+  // EDVR
+  int h_ = output_shape[2];
+  int w_ = output_shape[3];
+  int c_ = output_shape[1];
+  int frame_num = 1;
+  float *out_data = static_cast<float *>(infer_results[0].Data());
+  cv::Mat temp = cv::Mat::zeros(h_, w_, CV_32FC3); // RGB image
+  int pix_num = h_ * w_;
+  int frame_pix_num = pix_num * c_;
+  for (int frame = 0; frame < frame_num; frame++) {
+    int index = 0;
+    for (int h = 0; h < h_; ++h) {
+      for (int w = 0; w < w_; ++w) {
+        temp.at<cv::Vec3f>(h, w) = {
+            out_data[2 * pix_num + index + frame_pix_num * frame],
+            out_data[pix_num + index + frame_pix_num * frame],
+            out_data[index + frame_pix_num * frame]};
+        index += 1;
+      }
+    }
+    // tmp data type is float[0-1.0],convert to uint type
+    cv::Mat res = cv::Mat::zeros(temp.size(), CV_8UC3);
+    temp.convertTo(res, CV_8UC3, 255);
+    results.push_back(res);
+  }
+  return true;
+}
+} // namespace sr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h
new file mode 100755
index 0000000000..cca88716e8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/edvr.h
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace sr {
+
+class ULTRAINFER_DECL EDVR : public PPMSVSR {
+public:
+  /**
+   * Set path of model file and configuration file, and the configuration of
+   * runtime
+   * @param[in] model_file Path of model file, e.g EDVR/model.pdmodel
+   * @param[in] params_file Path of parameter file, e.g EDVR/model.pdiparams
+   * @param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends`
+   * @param[in] model_format Model format of the loaded model, default is Paddle
+   * format
+   */
+  EDVR(const std::string &model_file, const std::string &params_file,
+       const RuntimeOption &custom_option = RuntimeOption(),
+       const ModelFormat &model_format = ModelFormat::PADDLE);
+  /// model name contained EDVR
+  std::string ModelName() const override { return "EDVR"; }
+
+private:
+  bool Postprocess(std::vector<FDTensor> &infer_results,
+                   std::vector<cv::Mat> &results) override;
+};
+} // namespace sr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h
new file mode 100755
index 0000000000..91d3c19b19
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/model.h
@@ -0,0 +1,18 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "ultrainfer/vision/sr/ppsr/basicvsr.h"
+#include "ultrainfer/vision/sr/ppsr/edvr.h"
+#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h"
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc
new file mode 100755
index 0000000000..d60125c939
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.cc
@@ -0,0 +1,130 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/sr/ppsr/ppmsvsr.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace sr {
+
+PPMSVSR::PPMSVSR(const std::string &model_file, const std::string &params_file,
+                 const RuntimeOption &custom_option,
+                 const ModelFormat &model_format) {
+  // unsupported ORT backend
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO};
+  valid_gpu_backends = {Backend::PDINFER, Backend::TRT, Backend::ORT};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+
+  initialized = Initialize();
+}
+
+bool PPMSVSR::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  mean_ = {0., 0., 0.};
+  scale_ = {1., 1., 1.};
+  return true;
+}
+
+bool PPMSVSR::Preprocess(Mat *mat, std::vector<float> &output) {
+  BGR2RGB::Run(mat);
+  Normalize::Run(mat, mean_, scale_, true);
+  HWC2CHW::Run(mat);
+  // Csat float
+  float *ptr = static_cast<float *>(mat->Data());
+  size_t size = mat->Width() * mat->Height() * mat->Channels();
+  output = std::vector<float>(ptr, ptr + size);
+  return true;
+}
+
+bool PPMSVSR::Predict(std::vector<cv::Mat> &imgs,
+                      std::vector<cv::Mat> &results) {
+  // Theoretically, the more frame nums there are, the better the result will
+  // be, but it will lead to a significant increase in memory
+  int frame_num = imgs.size();
+  int rows = imgs[0].rows;
+  int cols = imgs[0].cols;
+  int channels = imgs[0].channels();
+  std::vector<FDTensor> input_tensors;
+  input_tensors.resize(1);
+  std::vector<float> all_data_temp;
+  for (int i = 0; i < frame_num; i++) {
+    Mat mat(imgs[i]);
+    std::vector<float> data_temp;
+    Preprocess(&mat, data_temp);
+    all_data_temp.insert(all_data_temp.end(), data_temp.begin(),
+                         data_temp.end());
+  }
+  // share memory in order to avoid memory copy, data type must be float32
+  input_tensors[0].SetExternalData({1, frame_num, channels, rows, cols},
+                                   FDDataType::FP32, all_data_temp.data());
+  input_tensors[0].shape = {1, frame_num, channels, rows, cols};
+  input_tensors[0].name = InputInfoOfRuntime(0).name;
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+  if (!Postprocess(output_tensors, results)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPMSVSR::Postprocess(std::vector<FDTensor> &infer_results,
+                          std::vector<cv::Mat> &results) {
+  // group to image
+  // output_shape is [b, n, c, h, w] n = frame_nums b=1(default)
+  // b and n is dependence export model shape
+  // see
+  // https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/video_super_resolution.md
+  auto output_shape = infer_results[0].shape;
+  // PP-MSVSR
+  int h_ = output_shape[3];
+  int w_ = output_shape[4];
+  int c_ = output_shape[2];
+  int frame_num = output_shape[1];
+
+  float *out_data = static_cast<float *>(infer_results[0].Data());
+  cv::Mat temp = cv::Mat::zeros(h_, w_, CV_32FC3); // RGB image
+  int pix_num = h_ * w_;
+  int frame_pix_num = pix_num * c_;
+  for (int frame = 0; frame < frame_num; frame++) {
+    int index = 0;
+    for (int h = 0; h < h_; ++h) {
+      for (int w = 0; w < w_; ++w) {
+        temp.at<cv::Vec3f>(h, w) = {
+            out_data[2 * pix_num + index + frame_pix_num * frame],
+            out_data[pix_num + index + frame_pix_num * frame],
+            out_data[index + frame_pix_num * frame]};
+        index += 1;
+      }
+    }
+    // tmp data type is float[0-1.0],convert to uint type
+    cv::Mat res = cv::Mat::zeros(temp.size(), CV_8UC3);
+    temp.convertTo(res, CV_8UC3, 255);
+    results.push_back(res);
+  }
+  return true;
+}
+} // namespace sr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h
new file mode 100755
index 0000000000..7a360cb908
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppmsvsr.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace sr {
+
+class ULTRAINFER_DECL PPMSVSR : public UltraInferModel {
+public:
+  /**
+   * Set path of model file and configuration file, and the configuration of
+   * runtime
+   * @param[in] model_file Path of model file, e.g PPMSVSR/model.pdmodel
+   * @param[in] params_file Path of parameter file, e.g PPMSVSR/model.pdiparams
+   * @param[in] custom_option RuntimeOption for inference, the default will use
+   * cpu, and choose the backend defined in `valid_cpu_backends`
+   * @param[in] model_format Model format of the loaded model, default is Paddle
+   * format
+   */
+  PPMSVSR(const std::string &model_file, const std::string &params_file,
+          const RuntimeOption &custom_option = RuntimeOption(),
+          const ModelFormat &model_format = ModelFormat::PADDLE);
+  /// model name contained PP-MSVSR。
+  std::string ModelName() const override { return "PPMSVSR"; }
+  /**
+   * get super resolution frame sequence
+   * @param[in] imgs origin frame sequences
+   * @param[in] results super resolution frame sequence
+   * @return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(std::vector<cv::Mat> &imgs,
+                       std::vector<cv::Mat> &results);
+
+protected:
+  PPMSVSR(){};
+
+  virtual bool Initialize();
+
+  virtual bool Preprocess(Mat *mat, std::vector<float> &output);
+
+  virtual bool Postprocess(std::vector<FDTensor> &infer_results,
+                           std::vector<cv::Mat> &results);
+
+  std::vector<float> mean_;
+  std::vector<float> scale_;
+};
+} // namespace sr
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc
new file mode 100755
index 0000000000..23f095fc8f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/ppsr/ppsr_pybind.cc
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPSR(pybind11::module &m) {
+  pybind11::class_<vision::sr::PPMSVSR, UltraInferModel>(m, "PPMSVSR")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::sr::PPMSVSR &self, std::vector<pybind11::array> &datas) {
+             std::vector<cv::Mat> inputs;
+             for (auto &data : datas) {
+               auto mat = PyArrayToCvMat(data);
+               inputs.push_back(mat);
+             }
+             std::vector<cv::Mat> res;
+             std::vector<pybind11::array> res_pyarray;
+             self.Predict(inputs, res);
+             for (auto &img : res) {
+               auto ret = pybind11::array_t<unsigned char>(
+                   {img.rows, img.cols, img.channels()}, img.data);
+               res_pyarray.push_back(ret);
+             }
+             return res_pyarray;
+           });
+  pybind11::class_<vision::sr::EDVR, UltraInferModel>(m, "EDVR")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::sr::EDVR &self, std::vector<pybind11::array> &datas) {
+             std::vector<cv::Mat> inputs;
+             for (auto &data : datas) {
+               auto mat = PyArrayToCvMat(data);
+               inputs.push_back(mat);
+             }
+             std::vector<cv::Mat> res;
+             std::vector<pybind11::array> res_pyarray;
+             self.Predict(inputs, res);
+             for (auto &img : res) {
+               auto ret = pybind11::array_t<unsigned char>(
+                   {img.rows, img.cols, img.channels()}, img.data);
+               res_pyarray.push_back(ret);
+             }
+             return res_pyarray;
+           });
+  pybind11::class_<vision::sr::BasicVSR, UltraInferModel>(m, "BasicVSR")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::sr::BasicVSR &self, std::vector<pybind11::array> &datas) {
+             std::vector<cv::Mat> inputs;
+             for (auto &data : datas) {
+               auto mat = PyArrayToCvMat(data);
+               inputs.push_back(mat);
+             }
+             std::vector<cv::Mat> res;
+             std::vector<pybind11::array> res_pyarray;
+             self.Predict(inputs, res);
+             for (auto &img : res) {
+               auto ret = pybind11::array_t<unsigned char>(
+                   {img.rows, img.cols, img.channels()}, img.data);
+               res_pyarray.push_back(ret);
+             }
+             return res_pyarray;
+           });
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc b/libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc
new file mode 100755
index 0000000000..3ae1a47453
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/sr/sr_pybind.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindPPSR(pybind11::module &m);
+
+void BindSR(pybind11::module &m) {
+  auto sr_module = m.def_submodule("sr", "sr(super resolution) submodule");
+  BindPPSR(sr_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc
new file mode 100755
index 0000000000..db32a5701b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.cc
@@ -0,0 +1,389 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/gatagat/lap/blob/master/lap/lapjv.cpp
+// Ths copyright of gatagat/lap is as follows:
+// MIT License
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ultrainfer/vision/tracking/pptracking/lapjv.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+
+/** Column-reduction and reduction transfer for a dense cost matrix.
+ */
+int _ccrrt_dense(const int n, float *cost[], int *free_rows, int *x, int *y,
+                 float *v) {
+  int n_free_rows;
+  bool *unique;
+
+  for (int i = 0; i < n; i++) {
+    x[i] = -1;
+    v[i] = LARGE;
+    y[i] = 0;
+  }
+  for (int i = 0; i < n; i++) {
+    for (int j = 0; j < n; j++) {
+      const float c = cost[i][j];
+      if (c < v[j]) {
+        v[j] = c;
+        y[j] = i;
+      }
+    }
+  }
+  NEW(unique, bool, n);
+  memset(unique, TRUE, n);
+  {
+    int j = n;
+    do {
+      j--;
+      const int i = y[j];
+      if (x[i] < 0) {
+        x[i] = j;
+      } else {
+        unique[i] = FALSE;
+        y[j] = -1;
+      }
+    } while (j > 0);
+  }
+  n_free_rows = 0;
+  for (int i = 0; i < n; i++) {
+    if (x[i] < 0) {
+      free_rows[n_free_rows++] = i;
+    } else if (unique[i]) {
+      const int j = x[i];
+      float min = LARGE;
+      for (int j2 = 0; j2 < n; j2++) {
+        if (j2 == static_cast<int>(j)) {
+          continue;
+        }
+        const float c = cost[i][j2] - v[j2];
+        if (c < min) {
+          min = c;
+        }
+      }
+      v[j] -= min;
+    }
+  }
+  FREE(unique);
+  return n_free_rows;
+}
+
+/** Augmenting row reduction for a dense cost matrix.
+ */
+int _carr_dense(const int n, float *cost[], const int n_free_rows,
+                int *free_rows, int *x, int *y, float *v) {
+  int current = 0;
+  int new_free_rows = 0;
+  int rr_cnt = 0;
+  while (current < n_free_rows) {
+    int i0;
+    int j1, j2;
+    float v1, v2, v1_new;
+    bool v1_lowers;
+
+    rr_cnt++;
+    const int free_i = free_rows[current++];
+    j1 = 0;
+    v1 = cost[free_i][0] - v[0];
+    j2 = -1;
+    v2 = LARGE;
+    for (int j = 1; j < n; j++) {
+      const float c = cost[free_i][j] - v[j];
+      if (c < v2) {
+        if (c >= v1) {
+          v2 = c;
+          j2 = j;
+        } else {
+          v2 = v1;
+          v1 = c;
+          j2 = j1;
+          j1 = j;
+        }
+      }
+    }
+    i0 = y[j1];
+    v1_new = v[j1] - (v2 - v1);
+    v1_lowers = v1_new < v[j1];
+    if (rr_cnt < current * n) {
+      if (v1_lowers) {
+        v[j1] = v1_new;
+      } else if (i0 >= 0 && j2 >= 0) {
+        j1 = j2;
+        i0 = y[j2];
+      }
+      if (i0 >= 0) {
+        if (v1_lowers) {
+          free_rows[--current] = i0;
+        } else {
+          free_rows[new_free_rows++] = i0;
+        }
+      }
+    } else {
+      if (i0 >= 0) {
+        free_rows[new_free_rows++] = i0;
+      }
+    }
+    x[free_i] = j1;
+    y[j1] = free_i;
+  }
+  return new_free_rows;
+}
+
+/** Find columns with minimum d[j] and put them on the SCAN list.
+ */
+int _find_dense(const int n, int lo, float *d, int *cols, int *y) {
+  int hi = lo + 1;
+  float mind = d[cols[lo]];
+  for (int k = hi; k < n; k++) {
+    int j = cols[k];
+    if (d[j] <= mind) {
+      if (d[j] < mind) {
+        hi = lo;
+        mind = d[j];
+      }
+      cols[k] = cols[hi];
+      cols[hi++] = j;
+    }
+  }
+  return hi;
+}
+
+// Scan all columns in TODO starting from arbitrary column in SCAN
+// and try to decrease d of the TODO columns using the SCAN column.
+int _scan_dense(const int n, float *cost[], int *plo, int *phi, float *d,
+                int *cols, int *pred, int *y, float *v) {
+  int lo = *plo;
+  int hi = *phi;
+  float h, cred_ij;
+
+  while (lo != hi) {
+    int j = cols[lo++];
+    const int i = y[j];
+    const float mind = d[j];
+    h = cost[i][j] - v[j] - mind;
+    // For all columns in TODO
+    for (int k = hi; k < n; k++) {
+      j = cols[k];
+      cred_ij = cost[i][j] - v[j] - h;
+      if (cred_ij < d[j]) {
+        d[j] = cred_ij;
+        pred[j] = i;
+        if (cred_ij == mind) {
+          if (y[j] < 0) {
+            return j;
+          }
+          cols[k] = cols[hi];
+          cols[hi++] = j;
+        }
+      }
+    }
+  }
+  *plo = lo;
+  *phi = hi;
+  return -1;
+}
+
+/** Single iteration of modified Dijkstra shortest path algorithm as explained
+ * in the JV paper.
+ *
+ * This is a dense matrix version.
+ *
+ * \return The closest free column index.
+ */
+int find_path_dense(const int n, float *cost[], const int start_i, int *y,
+                    float *v, int *pred) {
+  int lo = 0, hi = 0;
+  int final_j = -1;
+  int n_ready = 0;
+  int *cols;
+  float *d;
+
+  NEW(cols, int, n);
+  NEW(d, float, n);
+
+  for (int i = 0; i < n; i++) {
+    cols[i] = i;
+    pred[i] = start_i;
+    d[i] = cost[start_i][i] - v[i];
+  }
+  while (final_j == -1) {
+    // No columns left on the SCAN list.
+    if (lo == hi) {
+      n_ready = lo;
+      hi = _find_dense(n, lo, d, cols, y);
+      for (int k = lo; k < hi; k++) {
+        const int j = cols[k];
+        if (y[j] < 0) {
+          final_j = j;
+        }
+      }
+    }
+    if (final_j == -1) {
+      final_j = _scan_dense(n, cost, &lo, &hi, d, cols, pred, y, v);
+    }
+  }
+
+  {
+    const float mind = d[cols[lo]];
+    for (int k = 0; k < n_ready; k++) {
+      const int j = cols[k];
+      v[j] += d[j] - mind;
+    }
+  }
+
+  FREE(cols);
+  FREE(d);
+
+  return final_j;
+}
+
+/** Augment for a dense cost matrix.
+ */
+int _ca_dense(const int n, float *cost[], const int n_free_rows, int *free_rows,
+              int *x, int *y, float *v) {
+  int *pred;
+
+  NEW(pred, int, n);
+
+  for (int *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) {
+    int i = -1, j;
+    int k = 0;
+
+    j = find_path_dense(n, cost, *pfree_i, y, v, pred);
+    while (i != *pfree_i) {
+      i = pred[j];
+      y[j] = i;
+      SWAP_INDICES(j, x[i]);
+      k++;
+    }
+  }
+  FREE(pred);
+  return 0;
+}
+
+/** Solve dense sparse LAP.
+ */
+int lapjv_internal(const cv::Mat &cost, const bool extend_cost,
+                   const float cost_limit, int *x, int *y) {
+  int n_rows = cost.rows;
+  int n_cols = cost.cols;
+  int n;
+  if (n_rows == n_cols) {
+    n = n_rows;
+  } else if (!extend_cost) {
+    throw std::invalid_argument(
+        "Square cost array expected. If cost is intentionally non-square, pass "
+        "extend_cost=True.");
+  }
+
+  // Get extend cost
+  if (extend_cost || cost_limit < LARGE) {
+    n = n_rows + n_cols;
+  }
+  cv::Mat cost_expand(n, n, CV_32F);
+  float expand_value;
+  if (cost_limit < LARGE) {
+    expand_value = cost_limit / 2;
+  } else {
+    double max_v;
+    minMaxLoc(cost, nullptr, &max_v);
+    expand_value = static_cast<float>(max_v) + 1.;
+  }
+
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      cost_expand.at<float>(i, j) = expand_value;
+      if (i >= n_rows && j >= n_cols) {
+        cost_expand.at<float>(i, j) = 0;
+      } else if (i < n_rows && j < n_cols) {
+        cost_expand.at<float>(i, j) = cost.at<float>(i, j);
+      }
+    }
+  }
+
+  // Convert Mat to pointer array
+  float **cost_ptr;
+  NEW(cost_ptr, float *, n);
+  for (int i = 0; i < n; ++i) {
+    NEW(cost_ptr[i], float, n);
+  }
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      cost_ptr[i][j] = cost_expand.at<float>(i, j);
+    }
+  }
+
+  int ret;
+  int *free_rows;
+  float *v;
+  int *x_c;
+  int *y_c;
+
+  NEW(free_rows, int, n);
+  NEW(v, float, n);
+  NEW(x_c, int, n);
+  NEW(y_c, int, n);
+
+  ret = _ccrrt_dense(n, cost_ptr, free_rows, x_c, y_c, v);
+  int i = 0;
+  while (ret > 0 && i < 2) {
+    ret = _carr_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
+    i++;
+  }
+  if (ret > 0) {
+    ret = _ca_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
+  }
+  FREE(v);
+  FREE(free_rows);
+  for (int i = 0; i < n; ++i) {
+    FREE(cost_ptr[i]);
+  }
+  FREE(cost_ptr);
+  if (ret != 0) {
+    if (ret == -1) {
+      throw "Out of memory.";
+    }
+    throw "Unknown error (lapjv_internal)";
+  }
+  // Get output of x, y, opt
+  for (int i = 0; i < n; ++i) {
+    if (i < n_rows) {
+      x[i] = x_c[i];
+      if (x[i] >= n_cols) {
+        x[i] = -1;
+      }
+    }
+    if (i < n_cols) {
+      y[i] = y_c[i];
+      if (y[i] >= n_rows) {
+        y[i] = -1;
+      }
+    }
+  }
+
+  FREE(x_c);
+  FREE(y_c);
+  return ret;
+}
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h
new file mode 100755
index 0000000000..93dbbb531d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/lapjv.h
@@ -0,0 +1,62 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/gatagat/lap/blob/master/lap/lapjv.h
+// Ths copyright of gatagat/lap is as follows:
+// MIT License
+
+#pragma once
+#define LARGE 1000000
+
+#if !defined TRUE
+#define TRUE 1
+#endif
+#if !defined FALSE
+#define FALSE 0
+#endif
+
+#define NEW(x, t, n)                                                           \
+  if ((x = reinterpret_cast<t *>(malloc(sizeof(t) * (n)))) == 0) {             \
+    return -1;                                                                 \
+  }
+#define FREE(x)                                                                \
+  if (x != 0) {                                                                \
+    free(x);                                                                   \
+    x = 0;                                                                     \
+  }
+#define SWAP_INDICES(a, b)                                                     \
+  {                                                                            \
+    int_t _temp_index = a;                                                     \
+    a = b;                                                                     \
+    b = _temp_index;                                                           \
+  }
+#include <opencv2/opencv.hpp>
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+
+typedef signed int int_t;
+typedef unsigned int uint_t;
+typedef double cost_t;
+typedef char boolean;
+typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
+
+int lapjv_internal(const cv::Mat &cost, const bool extend_cost,
+                   const float cost_limit, int *x, int *y);
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc
new file mode 100755
index 0000000000..ffd6680499
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.cc
@@ -0,0 +1,169 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/tracking/pptracking/letter_box_resize.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+
+namespace ultrainfer {
+namespace vision {
+
+bool LetterBoxResize::ImplByOpenCV(Mat *mat) {
+  if (mat->Channels() != color_.size()) {
+    FDERROR << "LetterBoxResize: Require input channels equals to size of "
+               "color value, "
+               "but now channels = "
+            << mat->Channels()
+            << ", the size of color values = " << color_.size() << "."
+            << std::endl;
+    return false;
+  }
+  // generate scale_factor
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  int target_h = target_size_[0];
+  int target_w = target_size_[1];
+  float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
+  float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
+  float resize_scale = std::min(ratio_h, ratio_w);
+  // get_resized_shape
+  int new_shape_w = std::round(origin_w * resize_scale);
+  int new_shape_h = std::round(origin_h * resize_scale);
+  // calculate pad
+  float padw = (target_size_[1] - new_shape_w) / 2.;
+  float padh = (target_size_[0] - new_shape_h) / 2.;
+  int top = std::round(padh - 0.1);
+  int bottom = std::round(padh + 0.1);
+  int left = std::round(padw - 0.1);
+  int right = std::round(padw + 0.1);
+  Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false);
+  Pad::Run(mat, top, bottom, left, right, color_);
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool LetterBoxResize::ImplByFlyCV(Mat *mat) {
+  if (mat->Channels() != color_.size()) {
+    FDERROR << "LetterBoxResize: Require input channels equals to size of "
+               "color value, "
+               "but now channels = "
+            << mat->Channels()
+            << ", the size of color values = " << color_.size() << "."
+            << std::endl;
+    return false;
+  }
+  // generate scale_factor
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  int target_h = target_size_[0];
+  int target_w = target_size_[1];
+  float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
+  float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
+  float resize_scale = std::min(ratio_h, ratio_w);
+  // get_resized_shape
+  int new_shape_w = std::round(origin_w * resize_scale);
+  int new_shape_h = std::round(origin_h * resize_scale);
+  // calculate pad
+  float padw = (target_size_[1] - new_shape_w) / 2.;
+  float padh = (target_size_[0] - new_shape_h) / 2.;
+  int top = std::round(padh - 0.1);
+  int bottom = std::round(padh + 0.1);
+  int left = std::round(padw - 0.1);
+  int right = std::round(padw + 0.1);
+  Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false,
+              ProcLib::FLYCV);
+  Pad::Run(mat, top, bottom, left, right, color_, ProcLib::FLYCV);
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CVCUDA
+bool LetterBoxResize::ImplByCvCuda(Mat *mat) {
+  if (mat->Channels() != color_.size()) {
+    FDERROR << "LetterBoxResize: Require input channels equals to size of "
+               "color value, "
+               "but now channels = "
+            << mat->Channels()
+            << ", the size of color values = " << color_.size() << "."
+            << std::endl;
+    return false;
+  }
+  // generate scale_factor
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  int target_h = target_size_[0];
+  int target_w = target_size_[1];
+  float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
+  float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
+  float resize_scale = std::min(ratio_h, ratio_w);
+  // get_resized_shape
+  int new_shape_w = std::round(origin_w * resize_scale);
+  int new_shape_h = std::round(origin_h * resize_scale);
+  // calculate pad
+  float padw = (target_size_[1] - new_shape_w) / 2.;
+  float padh = (target_size_[0] - new_shape_h) / 2.;
+  int top = std::round(padh - 0.1);
+  int bottom = std::round(padh + 0.1);
+  int left = std::round(padw - 0.1);
+  int right = std::round(padw + 0.1);
+  Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false,
+              ProcLib::CVCUDA);
+  Pad::Run(mat, top, bottom, left, right, color_, ProcLib::CVCUDA);
+  return true;
+}
+#endif
+
+#ifdef ENABLE_CUDA
+bool LetterBoxResize::ImplByCuda(Mat *mat) {
+  if (mat->Channels() != color_.size()) {
+    FDERROR << "LetterBoxResize: Require input channels equals to size of "
+               "color value, "
+               "but now channels = "
+            << mat->Channels()
+            << ", the size of color values = " << color_.size() << "."
+            << std::endl;
+    return false;
+  }
+  // generate scale_factor
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+  int target_h = target_size_[0];
+  int target_w = target_size_[1];
+  float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
+  float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
+  float resize_scale = std::min(ratio_h, ratio_w);
+  // get_resized_shape
+  int new_shape_w = std::round(origin_w * resize_scale);
+  int new_shape_h = std::round(origin_h * resize_scale);
+  // calculate pad
+  float padw = (target_size_[1] - new_shape_w) / 2.;
+  float padh = (target_size_[0] - new_shape_h) / 2.;
+  int top = std::round(padh - 0.1);
+  int bottom = std::round(padh + 0.1);
+  int left = std::round(padw - 0.1);
+  int right = std::round(padw + 0.1);
+  Resize::Run(mat, new_shape_w, new_shape_h, -1.0, -1.0, 3, false,
+              ProcLib::CUDA);
+  Pad::Run(mat, top, bottom, left, right, color_, ProcLib::CUDA);
+  return true;
+}
+#endif
+
+bool LetterBoxResize::Run(Mat *mat, const std::vector<int> &target_size,
+                          const std::vector<float> &color, ProcLib lib) {
+  auto l = LetterBoxResize(target_size, color);
+  return l(mat, lib);
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h
new file mode 100755
index 0000000000..634265dc10
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/letter_box_resize.h
@@ -0,0 +1,52 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/vision/common/processors/base.h"
+
+namespace ultrainfer {
+namespace vision {
+
+class LetterBoxResize : public Processor {
+public:
+  LetterBoxResize(const std::vector<int> &target_size,
+                  const std::vector<float> &color) {
+    target_size_ = target_size;
+    color_ = color;
+  }
+
+  std::string Name() override { return "LetterBoxResize"; }
+  bool ImplByOpenCV(Mat *mat) override;
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat *mat) override;
+#endif
+#ifdef ENABLE_CVCUDA
+  virtual bool ImplByCvCuda(FDMat *mat) override;
+#endif
+
+#ifdef ENABLE_CUDA
+  virtual bool ImplByCuda(FDMat *mat);
+#endif
+
+  static bool Run(Mat *mat, const std::vector<int> &target_size,
+                  const std::vector<float> &color,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+private:
+  std::vector<int> target_size_;
+  std::vector<float> color_;
+};
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc
new file mode 100755
index 0000000000..b7d2611ead
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.cc
@@ -0,0 +1,316 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/tracking/pptracking/model.h"
+
+#include "ultrainfer/vision/tracking/pptracking/letter_box_resize.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+
+PPTracking::PPTracking(const std::string &model_file,
+                       const std::string &params_file,
+                       const std::string &config_file,
+                       const RuntimeOption &custom_option,
+                       const ModelFormat &model_format) {
+  config_file_ = config_file;
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+  valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+
+  initialized = Initialize();
+}
+
+bool PPTracking::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  // Get draw_threshold for visualization
+  if (cfg["draw_threshold"].IsDefined()) {
+    draw_threshold_ = cfg["draw_threshold"].as<float>();
+  } else {
+    FDERROR << "Please set draw_threshold." << std::endl;
+    return false;
+  }
+  // Get config for tracker
+  if (cfg["tracker"].IsDefined()) {
+    if (cfg["tracker"]["conf_thres"].IsDefined()) {
+      conf_thresh_ = cfg["tracker"]["conf_thres"].as<float>();
+    } else {
+      std::cerr << "Please set conf_thres in tracker." << std::endl;
+      return false;
+    }
+    if (cfg["tracker"]["min_box_area"].IsDefined()) {
+      min_box_area_ = cfg["tracker"]["min_box_area"].as<float>();
+    }
+    if (cfg["tracker"]["tracked_thresh"].IsDefined()) {
+      tracked_thresh_ = cfg["tracker"]["tracked_thresh"].as<float>();
+    }
+  }
+
+  processors_.push_back(std::make_shared<BGR2RGB>());
+  for (const auto &op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "Resize") {
+      bool keep_ratio = op["keep_ratio"].as<bool>();
+      auto target_size = op["target_size"].as<std::vector<int>>();
+      int interp = op["interp"].as<int>();
+      FDASSERT(target_size.size() == 2,
+               "Require size of target_size be 2, but now it's %lu.",
+               target_size.size());
+      if (!keep_ratio) {
+        int width = target_size[1];
+        int height = target_size[0];
+        processors_.push_back(
+            std::make_shared<Resize>(width, height, -1.0, -1.0, interp, false));
+      } else {
+        int min_target_size = std::min(target_size[0], target_size[1]);
+        int max_target_size = std::max(target_size[0], target_size[1]);
+        std::vector<int> max_size;
+        if (max_target_size > 0) {
+          max_size.push_back(max_target_size);
+          max_size.push_back(max_target_size);
+        }
+        processors_.push_back(std::make_shared<ResizeByShort>(
+            min_target_size, interp, true, max_size));
+      }
+
+    } else if (op_name == "LetterBoxResize") {
+      auto target_size = op["target_size"].as<std::vector<int>>();
+      FDASSERT(target_size.size() == 2,
+               "Require size of target_size be 2, but now it's %lu.",
+               target_size.size());
+      std::vector<float> color{127.0f, 127.0f, 127.0f};
+      if (op["fill_value"].IsDefined()) {
+        color = op["fill_value"].as<std::vector<float>>();
+      }
+      processors_.push_back(
+          std::make_shared<LetterBoxResize>(target_size, color));
+    } else if (op_name == "NormalizeImage") {
+      auto mean = op["mean"].as<std::vector<float>>();
+      auto std = op["std"].as<std::vector<float>>();
+      bool is_scale = true;
+      if (op["is_scale"]) {
+        is_scale = op["is_scale"].as<bool>();
+      }
+      std::string norm_type = "mean_std";
+      if (op["norm_type"]) {
+        norm_type = op["norm_type"].as<std::string>();
+      }
+      if (norm_type != "mean_std") {
+        std::fill(mean.begin(), mean.end(), 0.0);
+        std::fill(std.begin(), std.end(), 1.0);
+      }
+      processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+    } else if (op_name == "Permute") {
+      // Do nothing, do permute as the last operation
+      continue;
+      // processors_.push_back(std::make_shared<HWC2CHW>());
+    } else if (op_name == "Pad") {
+      auto size = op["size"].as<std::vector<int>>();
+      auto value = op["fill_value"].as<std::vector<float>>();
+      processors_.push_back(std::make_shared<Cast>("float"));
+      processors_.push_back(
+          std::make_shared<PadToSize>(size[1], size[0], value));
+    } else if (op_name == "PadStride") {
+      auto stride = op["stride"].as<int>();
+      processors_.push_back(
+          std::make_shared<StridePad>(stride, std::vector<float>(3, 0)));
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+  processors_.push_back(std::make_shared<HWC2CHW>());
+
+  FuseTransforms(&processors_);
+  return true;
+}
+
+bool PPTracking::Initialize() {
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
+    return false;
+  }
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize ultrainfer backend." << std::endl;
+    return false;
+  }
+  // create JDETracker instance
+  jdeTracker_ = std::unique_ptr<JDETracker>(new JDETracker);
+  return true;
+}
+
+bool PPTracking::Predict(cv::Mat *img, MOTResult *result) {
+  Mat mat(*img);
+  std::vector<FDTensor> input_tensors;
+
+  if (!Preprocess(&mat, &input_tensors)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  std::vector<FDTensor> output_tensors;
+  if (!Infer(input_tensors, &output_tensors)) {
+    FDERROR << "Failed to inference." << std::endl;
+    return false;
+  }
+
+  if (!Postprocess(output_tensors, result)) {
+    FDERROR << "Failed to post process." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool PPTracking::Preprocess(Mat *mat, std::vector<FDTensor> *outputs) {
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+
+  for (size_t i = 0; i < processors_.size(); ++i) {
+    if (!(*(processors_[i].get()))(mat)) {
+      FDERROR << "Failed to process image data in " << processors_[i]->Name()
+              << "." << std::endl;
+      return false;
+    }
+  }
+
+  //  LetterBoxResize(mat);
+  //  Normalize::Run(mat,mean_,scale_,is_scale_);
+  //  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+
+  outputs->resize(3);
+  // image_shape
+  (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(0).name);
+  float *shape = static_cast<float *>((*outputs)[0].MutableData());
+  shape[0] = mat->Height();
+  shape[1] = mat->Width();
+  // image
+  (*outputs)[1].name = InputInfoOfRuntime(1).name;
+  mat->ShareWithTensor(&((*outputs)[1]));
+  (*outputs)[1].ExpandDim(0);
+  // scale
+  (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(2).name);
+  float *scale = static_cast<float *>((*outputs)[2].MutableData());
+  scale[0] = mat->Height() * 1.0 / origin_h;
+  scale[1] = mat->Width() * 1.0 / origin_w;
+  return true;
+}
+
+void FilterDets(const float conf_thresh, const cv::Mat &dets,
+                std::vector<int> *index) {
+  for (int i = 0; i < dets.rows; ++i) {
+    float score = *dets.ptr<float>(i, 4);
+    if (score > conf_thresh) {
+      index->push_back(i);
+    }
+  }
+}
+
+bool PPTracking::Postprocess(std::vector<FDTensor> &infer_result,
+                             MOTResult *result) {
+  auto bbox_shape = infer_result[0].shape;
+  auto bbox_data = static_cast<float *>(infer_result[0].Data());
+
+  auto emb_shape = infer_result[1].shape;
+  auto emb_data = static_cast<float *>(infer_result[1].Data());
+
+  cv::Mat dets(bbox_shape[0], 6, CV_32FC1, bbox_data);
+  cv::Mat emb(bbox_shape[0], emb_shape[1], CV_32FC1, emb_data);
+
+  result->Clear();
+  std::vector<Track> tracks;
+  std::vector<int> valid;
+  FilterDets(conf_thresh_, dets, &valid);
+  cv::Mat new_dets, new_emb;
+  for (int i = 0; i < valid.size(); ++i) {
+    new_dets.push_back(dets.row(valid[i]));
+    new_emb.push_back(emb.row(valid[i]));
+  }
+  jdeTracker_->update(new_dets, new_emb, &tracks);
+  if (tracks.size() == 0) {
+    std::array<int, 4> box = {
+        int(*dets.ptr<float>(0, 0)), int(*dets.ptr<float>(0, 1)),
+        int(*dets.ptr<float>(0, 2)), int(*dets.ptr<float>(0, 3))};
+    result->boxes.push_back(box);
+    result->ids.push_back(1);
+    result->scores.push_back(*dets.ptr<float>(0, 4));
+  } else {
+    std::vector<Track>::iterator titer;
+    for (titer = tracks.begin(); titer != tracks.end(); ++titer) {
+      if (titer->score < tracked_thresh_) {
+        continue;
+      } else {
+        float w = titer->ltrb[2] - titer->ltrb[0];
+        float h = titer->ltrb[3] - titer->ltrb[1];
+        bool vertical = w / h > 1.6;
+        float area = w * h;
+        if (area > min_box_area_ && !vertical) {
+          std::array<int, 4> box = {int(titer->ltrb[0]), int(titer->ltrb[1]),
+                                    int(titer->ltrb[2]), int(titer->ltrb[3])};
+          result->boxes.push_back(box);
+          result->ids.push_back(titer->id);
+          result->scores.push_back(titer->score);
+        }
+      }
+    }
+  }
+  if (!is_record_trail_)
+    return true;
+  int nums = result->boxes.size();
+  for (int i = 0; i < nums; i++) {
+    float center_x = (result->boxes[i][0] + result->boxes[i][2]) / 2;
+    float center_y = (result->boxes[i][1] + result->boxes[i][3]) / 2;
+    int id = result->ids[i];
+    recorder_->Add(id, {int(center_x), int(center_y)});
+  }
+  return true;
+}
+
+void PPTracking::BindRecorder(TrailRecorder *recorder) {
+  recorder_ = recorder;
+  is_record_trail_ = true;
+}
+
+void PPTracking::UnbindRecorder() {
+  is_record_trail_ = false;
+  std::map<int, std::vector<std::array<int, 2>>>::iterator iter;
+  for (iter = recorder_->records.begin(); iter != recorder_->records.end();
+       iter++) {
+    iter->second.clear();
+    iter->second.shrink_to_fit();
+  }
+  recorder_->records.clear();
+  std::map<int, std::vector<std::array<int, 2>>>().swap(recorder_->records);
+  recorder_ = nullptr;
+}
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h
new file mode 100755
index 0000000000..10e08d970b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/model.h
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/common/processors/transform.h"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/tracking/pptracking/tracker.h"
+#include <map>
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+struct TrailRecorder {
+  std::map<int, std::vector<std::array<int, 2>>> records;
+  void Add(int id, const std::array<int, 2> &record);
+};
+
+inline void TrailRecorder::Add(int id, const std::array<int, 2> &record) {
+  auto iter = records.find(id);
+  if (iter != records.end()) {
+    auto trail = records[id];
+    trail.push_back(record);
+    records[id] = trail;
+  } else {
+    records[id] = {record};
+  }
+}
+
+class ULTRAINFER_DECL PPTracking : public UltraInferModel {
+public:
+  /** \brief Set path of model file and configuration file, and the
+   * configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g pptracking/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g
+   * pptracking/model.pdiparams, if the model format is ONNX, this parameter
+   * will be ignored \param[in] config_file Path of configuration file for
+   * deployment, e.g pptracking/infer_cfg.yml \param[in] custom_option
+   * RuntimeOption for inference, the default will use cpu, and choose the
+   * backend defined in `valid_cpu_backends` \param[in] model_format Model
+   * format of the loaded model, default is Paddle format
+   */
+  PPTracking(const std::string &model_file, const std::string &params_file,
+             const std::string &config_file,
+             const RuntimeOption &custom_option = RuntimeOption(),
+             const ModelFormat &model_format = ModelFormat::PADDLE);
+
+  /// Get model's name
+  std::string ModelName() const override { return "pptracking"; }
+
+  /** \brief Predict the detection result for an input image(consecutive)
+   *
+   * \param[in] im The input image data which is consecutive frame, comes from
+   * imread() or videoCapture.read() \param[in] result The output tracking
+   * result will be writen to this structure \return true if the prediction
+   * successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat *img, MOTResult *result);
+  /** \brief bind tracking trail struct
+   *
+   * \param[in] recorder The MOT trail will record the trail of object
+   */
+  void BindRecorder(TrailRecorder *recorder);
+  /** \brief cancel binding and clear trail information
+   */
+  void UnbindRecorder();
+
+private:
+  bool BuildPreprocessPipelineFromConfig();
+
+  bool Initialize();
+
+  bool Preprocess(Mat *img, std::vector<FDTensor> *outputs);
+
+  bool Postprocess(std::vector<FDTensor> &infer_result, MOTResult *result);
+
+  std::vector<std::shared_ptr<Processor>> processors_;
+  std::string config_file_;
+  float draw_threshold_;
+  float conf_thresh_;
+  float tracked_thresh_;
+  float min_box_area_;
+  bool is_record_trail_ = false;
+  std::unique_ptr<JDETracker> jdeTracker_;
+  TrailRecorder *recorder_ = nullptr;
+};
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc
new file mode 100755
index 0000000000..52c0b67fb5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/pptracking_pybind.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindPPTracking(pybind11::module &m) {
+
+  pybind11::class_<vision::tracking::TrailRecorder>(m, "TrailRecorder")
+      .def(pybind11::init<>())
+      .def_readwrite("records", &vision::tracking::TrailRecorder::records)
+      .def("add", &vision::tracking::TrailRecorder::Add);
+  pybind11::class_<vision::tracking::PPTracking, UltraInferModel>(m,
+                                                                  "PPTracking")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::tracking::PPTracking &self, pybind11::array &data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::MOTResult res;
+             self.Predict(&mat, &res);
+             return res;
+           })
+      .def("bind_recorder", &vision::tracking::PPTracking::BindRecorder)
+      .def("unbind_recorder", &vision::tracking::PPTracking::UnbindRecorder);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc
new file mode 100755
index 0000000000..0944cafa74
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.cc
@@ -0,0 +1,297 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.cpp
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#include <algorithm>
+#include <limits.h>
+#include <map>
+#include <stdio.h>
+
+#include "ultrainfer/vision/tracking/pptracking/lapjv.h"
+#include "ultrainfer/vision/tracking/pptracking/tracker.h"
+
+#define mat2vec4f(m)                                                           \
+  cv::Vec4f(*m.ptr<float>(0, 0), *m.ptr<float>(0, 1), *m.ptr<float>(0, 2),     \
+            *m.ptr<float>(0, 3))
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+
+static std::map<int, float> chi2inv95 = {
+    {1, 3.841459f},  {2, 5.991465f},  {3, 7.814728f},
+    {4, 9.487729f},  {5, 11.070498f}, {6, 12.591587f},
+    {7, 14.067140f}, {8, 15.507313f}, {9, 16.918978f}};
+
+JDETracker::JDETracker()
+    : timestamp(0), max_lost_time(30), lambda(0.98f), det_thresh(0.3f) {}
+
+bool JDETracker::update(const cv::Mat &dets, const cv::Mat &emb,
+                        std::vector<Track> *tracks) {
+  ++timestamp;
+  TrajectoryPool candidates(dets.rows);
+  for (int i = 0; i < dets.rows; ++i) {
+    float score = *dets.ptr<float>(i, 1);
+    const cv::Mat &ltrb_ = dets(cv::Rect(2, i, 4, 1));
+    cv::Vec4f ltrb = mat2vec4f(ltrb_);
+    const cv::Mat &embedding = emb(cv::Rect(0, i, emb.cols, 1));
+    candidates[i] = Trajectory(ltrb, score, embedding);
+  }
+
+  TrajectoryPtrPool tracked_trajectories;
+  TrajectoryPtrPool unconfirmed_trajectories;
+  for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) {
+    if (this->tracked_trajectories[i].is_activated)
+      tracked_trajectories.push_back(&this->tracked_trajectories[i]);
+    else
+      unconfirmed_trajectories.push_back(&this->tracked_trajectories[i]);
+  }
+
+  TrajectoryPtrPool trajectory_pool =
+      tracked_trajectories + &(this->lost_trajectories);
+
+  for (size_t i = 0; i < trajectory_pool.size(); ++i)
+    trajectory_pool[i]->predict();
+
+  Match matches;
+  std::vector<int> mismatch_row;
+  std::vector<int> mismatch_col;
+
+  cv::Mat cost = motion_distance(trajectory_pool, candidates);
+  linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col);
+
+  MatchIterator miter;
+  TrajectoryPtrPool activated_trajectories;
+  TrajectoryPtrPool retrieved_trajectories;
+
+  for (miter = matches.begin(); miter != matches.end(); miter++) {
+    Trajectory *pt = trajectory_pool[miter->first];
+    Trajectory &ct = candidates[miter->second];
+    if (pt->state == Tracked) {
+      pt->update(&ct, timestamp);
+      activated_trajectories.push_back(pt);
+    } else {
+      pt->reactivate(&ct, count, timestamp);
+      retrieved_trajectories.push_back(pt);
+    }
+  }
+
+  TrajectoryPtrPool next_candidates(mismatch_col.size());
+  for (size_t i = 0; i < mismatch_col.size(); ++i)
+    next_candidates[i] = &candidates[mismatch_col[i]];
+
+  TrajectoryPtrPool next_trajectory_pool;
+  for (size_t i = 0; i < mismatch_row.size(); ++i) {
+    int j = mismatch_row[i];
+    if (trajectory_pool[j]->state == Tracked)
+      next_trajectory_pool.push_back(trajectory_pool[j]);
+  }
+
+  cost = iou_distance(next_trajectory_pool, next_candidates);
+  linear_assignment(cost, 0.5f, &matches, &mismatch_row, &mismatch_col);
+
+  for (miter = matches.begin(); miter != matches.end(); miter++) {
+    Trajectory *pt = next_trajectory_pool[miter->first];
+    Trajectory *ct = next_candidates[miter->second];
+    if (pt->state == Tracked) {
+      pt->update(ct, timestamp);
+      activated_trajectories.push_back(pt);
+    } else {
+      pt->reactivate(ct, count, timestamp);
+      retrieved_trajectories.push_back(pt);
+    }
+  }
+
+  TrajectoryPtrPool lost_trajectories;
+  for (size_t i = 0; i < mismatch_row.size(); ++i) {
+    Trajectory *pt = next_trajectory_pool[mismatch_row[i]];
+    if (pt->state != Lost) {
+      pt->mark_lost();
+      lost_trajectories.push_back(pt);
+    }
+  }
+
+  TrajectoryPtrPool nnext_candidates(mismatch_col.size());
+  for (size_t i = 0; i < mismatch_col.size(); ++i)
+    nnext_candidates[i] = next_candidates[mismatch_col[i]];
+  cost = iou_distance(unconfirmed_trajectories, nnext_candidates);
+  linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col);
+
+  for (miter = matches.begin(); miter != matches.end(); miter++) {
+    unconfirmed_trajectories[miter->first]->update(
+        nnext_candidates[miter->second], timestamp);
+    activated_trajectories.push_back(unconfirmed_trajectories[miter->first]);
+  }
+
+  TrajectoryPtrPool removed_trajectories;
+
+  for (size_t i = 0; i < mismatch_row.size(); ++i) {
+    unconfirmed_trajectories[mismatch_row[i]]->mark_removed();
+    removed_trajectories.push_back(unconfirmed_trajectories[mismatch_row[i]]);
+  }
+
+  for (size_t i = 0; i < mismatch_col.size(); ++i) {
+    if (nnext_candidates[mismatch_col[i]]->score < det_thresh)
+      continue;
+    nnext_candidates[mismatch_col[i]]->activate(count, timestamp);
+    activated_trajectories.push_back(nnext_candidates[mismatch_col[i]]);
+  }
+
+  for (size_t i = 0; i < this->lost_trajectories.size(); ++i) {
+    Trajectory &lt = this->lost_trajectories[i];
+    if (timestamp - lt.timestamp > max_lost_time) {
+      lt.mark_removed();
+      removed_trajectories.push_back(&lt);
+    }
+  }
+
+  TrajectoryPoolIterator piter;
+  for (piter = this->tracked_trajectories.begin();
+       piter != this->tracked_trajectories.end();) {
+    if (piter->state != Tracked)
+      piter = this->tracked_trajectories.erase(piter);
+    else
+      ++piter;
+  }
+
+  this->tracked_trajectories += activated_trajectories;
+  this->tracked_trajectories += retrieved_trajectories;
+
+  this->lost_trajectories -= this->tracked_trajectories;
+  this->lost_trajectories += lost_trajectories;
+  this->lost_trajectories -= this->removed_trajectories;
+  this->removed_trajectories += removed_trajectories;
+  remove_duplicate_trajectory(&this->tracked_trajectories,
+                              &this->lost_trajectories);
+
+  tracks->clear();
+  for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) {
+    if (this->tracked_trajectories[i].is_activated) {
+      Track track = {this->tracked_trajectories[i].id,
+                     this->tracked_trajectories[i].score,
+                     this->tracked_trajectories[i].ltrb};
+      tracks->push_back(track);
+    }
+  }
+  return 0;
+}
+
+cv::Mat JDETracker::motion_distance(const TrajectoryPtrPool &a,
+                                    const TrajectoryPool &b) {
+  if (0 == a.size() || 0 == b.size())
+    return cv::Mat(a.size(), b.size(), CV_32F);
+
+  cv::Mat edists = embedding_distance(a, b);
+  cv::Mat mdists = mahalanobis_distance(a, b);
+  cv::Mat fdists = lambda * edists + (1 - lambda) * mdists;
+
+  const float gate_thresh = chi2inv95[4];
+  for (int i = 0; i < fdists.rows; ++i) {
+    for (int j = 0; j < fdists.cols; ++j) {
+      if (*mdists.ptr<float>(i, j) > gate_thresh)
+        *fdists.ptr<float>(i, j) = FLT_MAX;
+    }
+  }
+
+  return fdists;
+}
+
+void JDETracker::linear_assignment(const cv::Mat &cost, float cost_limit,
+                                   Match *matches,
+                                   std::vector<int> *mismatch_row,
+                                   std::vector<int> *mismatch_col) {
+  matches->clear();
+  mismatch_row->clear();
+  mismatch_col->clear();
+  if (cost.empty()) {
+    for (int i = 0; i < cost.rows; ++i)
+      mismatch_row->push_back(i);
+    for (int i = 0; i < cost.cols; ++i)
+      mismatch_col->push_back(i);
+    return;
+  }
+
+  float opt = 0;
+  cv::Mat x(cost.rows, 1, CV_32S);
+  cv::Mat y(cost.cols, 1, CV_32S);
+
+  lapjv_internal(cost, true, cost_limit, reinterpret_cast<int *>(x.data),
+                 reinterpret_cast<int *>(y.data));
+
+  for (int i = 0; i < x.rows; ++i) {
+    int j = *x.ptr<int>(i);
+    if (j >= 0)
+      matches->insert({i, j});
+    else
+      mismatch_row->push_back(i);
+  }
+
+  for (int i = 0; i < y.rows; ++i) {
+    int j = *y.ptr<int>(i);
+    if (j < 0)
+      mismatch_col->push_back(i);
+  }
+
+  return;
+}
+
+void JDETracker::remove_duplicate_trajectory(TrajectoryPool *a,
+                                             TrajectoryPool *b,
+                                             float iou_thresh) {
+  if (a->size() == 0 || b->size() == 0)
+    return;
+
+  cv::Mat dist = iou_distance(*a, *b);
+  cv::Mat mask = dist < iou_thresh;
+  std::vector<cv::Point> idx;
+  cv::findNonZero(mask, idx);
+
+  std::vector<int> da;
+  std::vector<int> db;
+  for (size_t i = 0; i < idx.size(); ++i) {
+    int ta = (*a)[idx[i].y].timestamp - (*a)[idx[i].y].starttime;
+    int tb = (*b)[idx[i].x].timestamp - (*b)[idx[i].x].starttime;
+    if (ta > tb)
+      db.push_back(idx[i].x);
+    else
+      da.push_back(idx[i].y);
+  }
+
+  int id = 0;
+  TrajectoryPoolIterator piter;
+  for (piter = a->begin(); piter != a->end();) {
+    std::vector<int>::iterator iter = find(da.begin(), da.end(), id++);
+    if (iter != da.end())
+      piter = a->erase(piter);
+    else
+      ++piter;
+  }
+
+  id = 0;
+  for (piter = b->begin(); piter != b->end();) {
+    std::vector<int>::iterator iter = find(db.begin(), db.end(), id++);
+    if (iter != db.end())
+      piter = b->erase(piter);
+    else
+      ++piter;
+  }
+}
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h
new file mode 100755
index 0000000000..12337e465f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/tracker.h
@@ -0,0 +1,73 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.h
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#pragma once
+
+#include <map>
+#include <vector>
+
+#include "ultrainfer/ultrainfer_model.h"
+#include "ultrainfer/vision/tracking/pptracking/trajectory.h"
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+
+typedef std::map<int, int> Match;
+typedef std::map<int, int>::iterator MatchIterator;
+
+struct Track {
+  int id;
+  float score;
+  cv::Vec4f ltrb;
+};
+
+class ULTRAINFER_DECL JDETracker {
+public:
+  JDETracker();
+
+  virtual bool update(const cv::Mat &dets, const cv::Mat &emb,
+                      std::vector<Track> *tracks);
+  virtual ~JDETracker() {}
+
+private:
+  cv::Mat motion_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b);
+  void linear_assignment(const cv::Mat &cost, float cost_limit, Match *matches,
+                         std::vector<int> *mismatch_row,
+                         std::vector<int> *mismatch_col);
+  void remove_duplicate_trajectory(TrajectoryPool *a, TrajectoryPool *b,
+                                   float iou_thresh = 0.15f);
+
+private:
+  int timestamp;
+  TrajectoryPool tracked_trajectories;
+  TrajectoryPool lost_trajectories;
+  TrajectoryPool removed_trajectories;
+  int max_lost_time;
+  float lambda;
+  float det_thresh;
+  int count = 0;
+};
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc
new file mode 100755
index 0000000000..63dcf69ce6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.cc
@@ -0,0 +1,529 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.cpp
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#include "ultrainfer/vision/tracking/pptracking/trajectory.h"
+#include <algorithm>
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+
+void TKalmanFilter::init(const cv::Mat &measurement) {
+  measurement.copyTo(statePost(cv::Rect(0, 0, 1, 4)));
+  statePost(cv::Rect(0, 4, 1, 4)).setTo(0);
+  statePost.copyTo(statePre);
+
+  float varpos = 2 * std_weight_position * (*measurement.ptr<float>(3));
+  varpos *= varpos;
+  float varvel = 10 * std_weight_velocity * (*measurement.ptr<float>(3));
+  varvel *= varvel;
+
+  errorCovPost.setTo(0);
+  *errorCovPost.ptr<float>(0, 0) = varpos;
+  *errorCovPost.ptr<float>(1, 1) = varpos;
+  *errorCovPost.ptr<float>(2, 2) = 1e-4f;
+  *errorCovPost.ptr<float>(3, 3) = varpos;
+  *errorCovPost.ptr<float>(4, 4) = varvel;
+  *errorCovPost.ptr<float>(5, 5) = varvel;
+  *errorCovPost.ptr<float>(6, 6) = 1e-10f;
+  *errorCovPost.ptr<float>(7, 7) = varvel;
+  errorCovPost.copyTo(errorCovPre);
+}
+
+const cv::Mat &TKalmanFilter::predict() {
+  float varpos = std_weight_position * (*statePre.ptr<float>(3));
+  varpos *= varpos;
+  float varvel = std_weight_velocity * (*statePre.ptr<float>(3));
+  varvel *= varvel;
+
+  processNoiseCov.setTo(0);
+  *processNoiseCov.ptr<float>(0, 0) = varpos;
+  *processNoiseCov.ptr<float>(1, 1) = varpos;
+  *processNoiseCov.ptr<float>(2, 2) = 1e-4f;
+  *processNoiseCov.ptr<float>(3, 3) = varpos;
+  *processNoiseCov.ptr<float>(4, 4) = varvel;
+  *processNoiseCov.ptr<float>(5, 5) = varvel;
+  *processNoiseCov.ptr<float>(6, 6) = 1e-10f;
+  *processNoiseCov.ptr<float>(7, 7) = varvel;
+
+  return cv::KalmanFilter::predict();
+}
+
+const cv::Mat &TKalmanFilter::correct(const cv::Mat &measurement) {
+  float varpos = std_weight_position * (*measurement.ptr<float>(3));
+  varpos *= varpos;
+
+  measurementNoiseCov.setTo(0);
+  *measurementNoiseCov.ptr<float>(0, 0) = varpos;
+  *measurementNoiseCov.ptr<float>(1, 1) = varpos;
+  *measurementNoiseCov.ptr<float>(2, 2) = 1e-2f;
+  *measurementNoiseCov.ptr<float>(3, 3) = varpos;
+
+  return cv::KalmanFilter::correct(measurement);
+}
+
+void TKalmanFilter::project(cv::Mat *mean, cv::Mat *covariance) const {
+  float varpos = std_weight_position * (*statePost.ptr<float>(3));
+  varpos *= varpos;
+
+  cv::Mat measurementNoiseCov_ = cv::Mat::eye(4, 4, CV_32F);
+  *measurementNoiseCov_.ptr<float>(0, 0) = varpos;
+  *measurementNoiseCov_.ptr<float>(1, 1) = varpos;
+  *measurementNoiseCov_.ptr<float>(2, 2) = 1e-2f;
+  *measurementNoiseCov_.ptr<float>(3, 3) = varpos;
+
+  *mean = measurementMatrix * statePost;
+  cv::Mat temp = measurementMatrix * errorCovPost;
+  gemm(temp, measurementMatrix, 1, measurementNoiseCov_, 1, *covariance,
+       cv::GEMM_2_T);
+}
+
+const cv::Mat &Trajectory::predict(void) {
+  if (state != Tracked)
+    *cv::KalmanFilter::statePost.ptr<float>(7) = 0;
+  return TKalmanFilter::predict();
+}
+
+void Trajectory::update(Trajectory *traj, int timestamp_,
+                        bool update_embedding_) {
+  timestamp = timestamp_;
+  ++length;
+  ltrb = traj->ltrb;
+  xyah = traj->xyah;
+  TKalmanFilter::correct(cv::Mat(traj->xyah));
+  state = Tracked;
+  is_activated = true;
+  score = traj->score;
+  if (update_embedding_)
+    update_embedding(traj->current_embedding);
+}
+
+void Trajectory::activate(int &cnt, int timestamp_) {
+  id = next_id(cnt);
+  TKalmanFilter::init(cv::Mat(xyah));
+  length = 0;
+  state = Tracked;
+  if (timestamp_ == 1) {
+    is_activated = true;
+  }
+  timestamp = timestamp_;
+  starttime = timestamp_;
+}
+
+void Trajectory::reactivate(Trajectory *traj, int &cnt, int timestamp_,
+                            bool newid) {
+  TKalmanFilter::correct(cv::Mat(traj->xyah));
+  update_embedding(traj->current_embedding);
+  length = 0;
+  state = Tracked;
+  is_activated = true;
+  timestamp = timestamp_;
+  if (newid)
+    id = next_id(cnt);
+}
+
+void Trajectory::update_embedding(const cv::Mat &embedding) {
+  current_embedding = embedding / cv::norm(embedding);
+  if (smooth_embedding.empty()) {
+    smooth_embedding = current_embedding;
+  } else {
+    smooth_embedding = eta * smooth_embedding + (1 - eta) * current_embedding;
+  }
+  smooth_embedding = smooth_embedding / cv::norm(smooth_embedding);
+}
+
+TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPool &b) {
+  TrajectoryPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i)
+    ids[i] = a[i].id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i].id);
+    if (iter == ids.end()) {
+      sum.push_back(b[i]);
+      ids.push_back(b[i].id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPtrPool &b) {
+  TrajectoryPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i)
+    ids[i] = a[i].id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
+    if (iter == ids.end()) {
+      sum.push_back(*b[i]);
+      ids.push_back(b[i]->id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT
+                           const TrajectoryPtrPool &b) {
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i)
+    ids[i] = a[i].id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    if (b[i]->smooth_embedding.empty())
+      continue;
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
+    if (iter == ids.end()) {
+      a.push_back(*b[i]);
+      ids.push_back(b[i]->id);
+    }
+  }
+
+  return a;
+}
+
+TrajectoryPool operator-(const TrajectoryPool &a, const TrajectoryPool &b) {
+  TrajectoryPool dif;
+  std::vector<int> ids(b.size());
+  for (size_t i = 0; i < b.size(); ++i)
+    ids[i] = b[i].id;
+
+  for (size_t i = 0; i < a.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), a[i].id);
+    if (iter == ids.end())
+      dif.push_back(a[i]);
+  }
+
+  return dif;
+}
+
+TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT
+                           const TrajectoryPool &b) {
+  std::vector<int> ids(b.size());
+  for (size_t i = 0; i < b.size(); ++i)
+    ids[i] = b[i].id;
+
+  TrajectoryPoolIterator piter;
+  for (piter = a.begin(); piter != a.end();) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), piter->id);
+    if (iter == ids.end())
+      ++piter;
+    else
+      piter = a.erase(piter);
+  }
+
+  return a;
+}
+
+TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
+                            const TrajectoryPtrPool &b) {
+  TrajectoryPtrPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i)
+    ids[i] = a[i]->id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
+    if (iter == ids.end()) {
+      sum.push_back(b[i]);
+      ids.push_back(b[i]->id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, TrajectoryPool *b) {
+  TrajectoryPtrPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i)
+    ids[i] = a[i]->id;
+
+  for (size_t i = 0; i < b->size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), (*b)[i].id);
+    if (iter == ids.end()) {
+      sum.push_back(&(*b)[i]);
+      ids.push_back((*b)[i].id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPtrPool operator-(const TrajectoryPtrPool &a,
+                            const TrajectoryPtrPool &b) {
+  TrajectoryPtrPool dif;
+  std::vector<int> ids(b.size());
+  for (size_t i = 0; i < b.size(); ++i)
+    ids[i] = b[i]->id;
+
+  for (size_t i = 0; i < a.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), a[i]->id);
+    if (iter == ids.end())
+      dif.push_back(a[i]);
+  }
+
+  return dif;
+}
+
+cv::Mat embedding_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      cv::Mat u = a[i].smooth_embedding;
+      cv::Mat v = b[j].smooth_embedding;
+      double uv = u.dot(v);
+      double uu = u.dot(u);
+      double vv = v.dot(v);
+      double dist = std::abs(1. - uv / std::sqrt(uu * vv));
+      // double dist = cv::norm(a[i].smooth_embedding, b[j].smooth_embedding,
+      // cv::NORM_L2);
+      distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
+    }
+  }
+  return dists;
+}
+
+cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                           const TrajectoryPtrPool &b) {
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      // double dist = cv::norm(a[i]->smooth_embedding, b[j]->smooth_embedding,
+      // cv::NORM_L2);
+      // distsi[j] = static_cast<float>(dist);
+      cv::Mat u = a[i]->smooth_embedding;
+      cv::Mat v = b[j]->smooth_embedding;
+      double uv = u.dot(v);
+      double uu = u.dot(u);
+      double vv = v.dot(v);
+      double dist = std::abs(1. - uv / std::sqrt(uu * vv));
+      distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                           const TrajectoryPool &b) {
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      // double dist = cv::norm(a[i]->smooth_embedding, b[j].smooth_embedding,
+      // cv::NORM_L2);
+      // distsi[j] = static_cast<float>(dist);
+      cv::Mat u = a[i]->smooth_embedding;
+      cv::Mat v = b[j].smooth_embedding;
+      double uv = u.dot(v);
+      double uu = u.dot(u);
+      double vv = v.dot(v);
+      double dist = std::abs(1. - uv / std::sqrt(uu * vv));
+      distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat mahalanobis_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
+  std::vector<cv::Mat> means(a.size());
+  std::vector<cv::Mat> icovariances(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    cv::Mat covariance;
+    a[i].project(&means[i], &covariance);
+    cv::invert(covariance, icovariances[i]);
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Mat x(b[j].xyah);
+      float dist =
+          static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
+      distsi[j] = dist * dist;
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                             const TrajectoryPtrPool &b) {
+  std::vector<cv::Mat> means(a.size());
+  std::vector<cv::Mat> icovariances(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    cv::Mat covariance;
+    a[i]->project(&means[i], &covariance);
+    cv::invert(covariance, icovariances[i]);
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Mat x(b[j]->xyah);
+      float dist =
+          static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
+      distsi[j] = dist * dist;
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                             const TrajectoryPool &b) {
+  std::vector<cv::Mat> means(a.size());
+  std::vector<cv::Mat> icovariances(a.size());
+
+  for (size_t i = 0; i < a.size(); ++i) {
+    cv::Mat covariance;
+    a[i]->project(&means[i], &covariance);
+    cv::invert(covariance, icovariances[i]);
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Mat x(b[j].xyah);
+      float dist =
+          static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
+      distsi[j] = dist * dist;
+    }
+  }
+
+  return dists;
+}
+
+static inline float calc_inter_area(const cv::Vec4f &a, const cv::Vec4f &b) {
+  if (a[2] < b[0] || a[0] > b[2] || a[3] < b[1] || a[1] > b[3])
+    return 0.f;
+
+  float w = std::min(a[2], b[2]) - std::max(a[0], b[0]);
+  float h = std::min(a[3], b[3]) - std::max(a[1], b[1]);
+  return w * h;
+}
+
+cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
+  std::vector<float> areaa(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    float w = a[i].ltrb[2] - a[i].ltrb[0];
+    float h = a[i].ltrb[3] - a[i].ltrb[1];
+    areaa[i] = w * h;
+  }
+
+  std::vector<float> areab(b.size());
+  for (size_t j = 0; j < b.size(); ++j) {
+    float w = b[j].ltrb[2] - b[j].ltrb[0];
+    float h = b[j].ltrb[3] - b[j].ltrb[1];
+    areab[j] = w * h;
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    const cv::Vec4f &boxa = a[i].ltrb;
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Vec4f &boxb = b[j].ltrb;
+      float inters = calc_inter_area(boxa, boxb);
+      distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b) {
+  std::vector<float> areaa(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    float w = a[i]->ltrb[2] - a[i]->ltrb[0];
+    float h = a[i]->ltrb[3] - a[i]->ltrb[1];
+    areaa[i] = w * h;
+  }
+
+  std::vector<float> areab(b.size());
+  for (size_t j = 0; j < b.size(); ++j) {
+    float w = b[j]->ltrb[2] - b[j]->ltrb[0];
+    float h = b[j]->ltrb[3] - b[j]->ltrb[1];
+    areab[j] = w * h;
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    const cv::Vec4f &boxa = a[i]->ltrb;
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Vec4f &boxb = b[j]->ltrb;
+      float inters = calc_inter_area(boxa, boxb);
+      distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b) {
+  std::vector<float> areaa(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    float w = a[i]->ltrb[2] - a[i]->ltrb[0];
+    float h = a[i]->ltrb[3] - a[i]->ltrb[1];
+    areaa[i] = w * h;
+  }
+
+  std::vector<float> areab(b.size());
+  for (size_t j = 0; j < b.size(); ++j) {
+    float w = b[j].ltrb[2] - b[j].ltrb[0];
+    float h = b[j].ltrb[3] - b[j].ltrb[1];
+    areab[j] = w * h;
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    const cv::Vec4f &boxa = a[i]->ltrb;
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Vec4f &boxb = b[j].ltrb;
+      float inters = calc_inter_area(boxa, boxb);
+      distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
+    }
+  }
+
+  return dists;
+}
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h
new file mode 100755
index 0000000000..d5df581c5d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/pptracking/trajectory.h
@@ -0,0 +1,213 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.h
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#pragma once
+
+#include "opencv2/video/tracking.hpp"
+#include "ultrainfer/ultrainfer_model.h"
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include <vector>
+
+namespace ultrainfer {
+namespace vision {
+namespace tracking {
+
+typedef enum { New = 0, Tracked = 1, Lost = 2, Removed = 3 } TrajectoryState;
+
+class Trajectory;
+typedef std::vector<Trajectory> TrajectoryPool;
+typedef std::vector<Trajectory>::iterator TrajectoryPoolIterator;
+typedef std::vector<Trajectory *> TrajectoryPtrPool;
+typedef std::vector<Trajectory *>::iterator TrajectoryPtrPoolIterator;
+
+class ULTRAINFER_DECL TKalmanFilter : public cv::KalmanFilter {
+public:
+  TKalmanFilter(void);
+  virtual ~TKalmanFilter(void) {}
+  virtual void init(const cv::Mat &measurement);
+  virtual const cv::Mat &predict();
+  virtual const cv::Mat &correct(const cv::Mat &measurement);
+  virtual void project(cv::Mat *mean, cv::Mat *covariance) const;
+
+private:
+  float std_weight_position;
+  float std_weight_velocity;
+};
+
+inline TKalmanFilter::TKalmanFilter(void) : cv::KalmanFilter(8, 4) {
+  cv::KalmanFilter::transitionMatrix = cv::Mat::eye(8, 8, CV_32F);
+  for (int i = 0; i < 4; ++i)
+    cv::KalmanFilter::transitionMatrix.at<float>(i, i + 4) = 1;
+  cv::KalmanFilter::measurementMatrix = cv::Mat::eye(4, 8, CV_32F);
+  std_weight_position = 1 / 20.f;
+  std_weight_velocity = 1 / 160.f;
+}
+
+class ULTRAINFER_DECL Trajectory : public TKalmanFilter {
+public:
+  Trajectory();
+  Trajectory(const cv::Vec4f &ltrb, float score, const cv::Mat &embedding);
+  Trajectory(const Trajectory &other);
+  Trajectory &operator=(const Trajectory &rhs);
+  virtual ~Trajectory(void) {}
+
+  int next_id(int &nt);
+  virtual const cv::Mat &predict(void);
+  virtual void update(Trajectory *traj, int timestamp,
+                      bool update_embedding = true);
+  virtual void activate(int &cnt, int timestamp);
+  virtual void reactivate(Trajectory *traj, int &cnt, int timestamp,
+                          bool newid = false);
+  virtual void mark_lost(void);
+  virtual void mark_removed(void);
+
+  friend TrajectoryPool operator+(const TrajectoryPool &a,
+                                  const TrajectoryPool &b);
+  friend TrajectoryPool operator+(const TrajectoryPool &a,
+                                  const TrajectoryPtrPool &b);
+  friend TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT
+                                    const TrajectoryPtrPool &b);
+  friend TrajectoryPool operator-(const TrajectoryPool &a,
+                                  const TrajectoryPool &b);
+  friend TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT
+                                    const TrajectoryPool &b);
+  friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
+                                     const TrajectoryPtrPool &b);
+  friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
+                                     TrajectoryPool *b);
+  friend TrajectoryPtrPool operator-(const TrajectoryPtrPool &a,
+                                     const TrajectoryPtrPool &b);
+
+  friend cv::Mat embedding_distance(const TrajectoryPool &a,
+                                    const TrajectoryPool &b);
+  friend cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                                    const TrajectoryPtrPool &b);
+  friend cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                                    const TrajectoryPool &b);
+
+  friend cv::Mat mahalanobis_distance(const TrajectoryPool &a,
+                                      const TrajectoryPool &b);
+  friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                                      const TrajectoryPtrPool &b);
+  friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                                      const TrajectoryPool &b);
+
+  friend cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b);
+  friend cv::Mat iou_distance(const TrajectoryPtrPool &a,
+                              const TrajectoryPtrPool &b);
+  friend cv::Mat iou_distance(const TrajectoryPtrPool &a,
+                              const TrajectoryPool &b);
+
+private:
+  void update_embedding(const cv::Mat &embedding);
+
+public:
+  TrajectoryState state;
+  cv::Vec4f ltrb;
+  cv::Mat smooth_embedding;
+  int id;
+  bool is_activated;
+  int timestamp;
+  int starttime;
+  float score;
+
+private:
+  //  int count=0;
+  cv::Vec4f xyah;
+  cv::Mat current_embedding;
+  float eta;
+  int length;
+};
+
+inline cv::Vec4f ltrb2xyah(const cv::Vec4f &ltrb) {
+  cv::Vec4f xyah;
+  xyah[0] = (ltrb[0] + ltrb[2]) * 0.5f;
+  xyah[1] = (ltrb[1] + ltrb[3]) * 0.5f;
+  xyah[3] = ltrb[3] - ltrb[1];
+  xyah[2] = (ltrb[2] - ltrb[0]) / xyah[3];
+  return xyah;
+}
+
+inline Trajectory::Trajectory()
+    : state(New), ltrb(cv::Vec4f()), smooth_embedding(cv::Mat()), id(0),
+      is_activated(false), timestamp(0), starttime(0), score(0), eta(0.9),
+      length(0) {}
+
+inline Trajectory::Trajectory(const cv::Vec4f &ltrb_, float score_,
+                              const cv::Mat &embedding)
+    : state(New), ltrb(ltrb_), smooth_embedding(cv::Mat()), id(0),
+      is_activated(false), timestamp(0), starttime(0), score(score_), eta(0.9),
+      length(0) {
+  xyah = ltrb2xyah(ltrb);
+  update_embedding(embedding);
+}
+
+inline Trajectory::Trajectory(const Trajectory &other)
+    : state(other.state), ltrb(other.ltrb), id(other.id),
+      is_activated(other.is_activated), timestamp(other.timestamp),
+      starttime(other.starttime), xyah(other.xyah), score(other.score),
+      eta(other.eta), length(other.length) {
+  other.smooth_embedding.copyTo(smooth_embedding);
+  other.current_embedding.copyTo(current_embedding);
+  // copy state in KalmanFilter
+
+  other.statePre.copyTo(cv::KalmanFilter::statePre);
+  other.statePost.copyTo(cv::KalmanFilter::statePost);
+  other.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
+  other.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
+}
+
+inline Trajectory &Trajectory::operator=(const Trajectory &rhs) {
+  this->state = rhs.state;
+  this->ltrb = rhs.ltrb;
+  rhs.smooth_embedding.copyTo(this->smooth_embedding);
+  this->id = rhs.id;
+  this->is_activated = rhs.is_activated;
+  this->timestamp = rhs.timestamp;
+  this->starttime = rhs.starttime;
+  this->xyah = rhs.xyah;
+  this->score = rhs.score;
+  rhs.current_embedding.copyTo(this->current_embedding);
+  this->eta = rhs.eta;
+  this->length = rhs.length;
+
+  // copy state in KalmanFilter
+
+  rhs.statePre.copyTo(cv::KalmanFilter::statePre);
+  rhs.statePost.copyTo(cv::KalmanFilter::statePost);
+  rhs.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
+  rhs.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
+
+  return *this;
+}
+
+inline int Trajectory::next_id(int &cnt) {
+  ++cnt;
+  return cnt;
+}
+
+inline void Trajectory::mark_lost(void) { state = Lost; }
+
+inline void Trajectory::mark_removed(void) { state = Removed; }
+
+} // namespace tracking
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc b/libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc
new file mode 100755
index 0000000000..6d3565c8c8
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/tracking/tracking_pybind.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindPPTracking(pybind11::module &m);
+
+void BindTracking(pybind11::module &m) {
+  auto tracking_module = m.def_submodule("tracking", "object tracking models.");
+  BindPPTracking(tracking_module);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc b/libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc
new file mode 100755
index 0000000000..c0a3f34e56
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/cosine_similarity.cc
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+float CosineSimilarity(const std::vector<float> &a, const std::vector<float> &b,
+                       bool normalized) {
+  FDASSERT((a.size() == b.size()) && (a.size() != 0),
+           "The size of a and b must be equal and >= 1.");
+  size_t num_val = a.size();
+  if (normalized) {
+    float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f;
+    for (size_t i = 0; i < num_val; ++i) {
+      mul_a += (a[i] * a[i]);
+      mul_b += (b[i] * b[i]);
+      mul_ab += (a[i] * b[i]);
+    }
+    return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b)));
+  }
+  auto norm_a = L2Normalize(a);
+  auto norm_b = L2Normalize(b);
+  float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f;
+  for (size_t i = 0; i < num_val; ++i) {
+    mul_a += (norm_a[i] * norm_a[i]);
+    mul_b += (norm_b[i] * norm_b[i]);
+    mul_ab += (norm_a[i] * norm_b[i]);
+  }
+  return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b)));
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc b/libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc
new file mode 100755
index 0000000000..8da51f87f4
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/crop_image.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+bool CropImageByBox(Mat &src_im, Mat *dst_im, const std::vector<float> &box,
+                    std::vector<float> *center, std::vector<float> *scale,
+                    const float expandratio) {
+  const cv::Mat *img = src_im.GetOpenCVMat();
+  cv::Mat *crop_img = dst_im->GetOpenCVMat();
+  int xmin = static_cast<int>(box[0]);
+  int ymin = static_cast<int>(box[1]);
+  int xmax = static_cast<int>(box[2]);
+  int ymax = static_cast<int>(box[3]);
+  float centerx = (xmin + xmax) / 2.0f;
+  float centery = (ymin + ymax) / 2.0f;
+  float half_h = (ymax - ymin) * (1 + expandratio) / 2.0f;
+  float half_w = (xmax - xmin) * (1 + expandratio) / 2.0f;
+  // adjust h or w to keep image ratio, expand the shorter edge
+  if (half_h * 3 > half_w * 4) {
+    half_w = half_h * 0.75;
+  }
+  int crop_xmin = std::max(0, static_cast<int>(centerx - half_w));
+  int crop_ymin = std::max(0, static_cast<int>(centery - half_h));
+  int crop_xmax = std::min(img->cols - 1, static_cast<int>(centerx + half_w));
+  int crop_ymax = std::min(img->rows - 1, static_cast<int>(centery + half_h));
+
+  crop_img->create(crop_ymax - crop_ymin, crop_xmax - crop_xmin, img->type());
+  *crop_img =
+      (*img)(cv::Range(crop_ymin, crop_ymax), cv::Range(crop_xmin, crop_xmax));
+  center->clear();
+  center->emplace_back((crop_xmin + crop_xmax) / 2.0f);
+  center->emplace_back((crop_ymin + crop_ymax) / 2.0f);
+
+  scale->clear();
+  scale->emplace_back((crop_xmax - crop_xmin));
+  scale->emplace_back((crop_ymax - crop_ymin));
+
+  dst_im->SetWidth(crop_img->cols);
+  dst_im->SetHeight(crop_img->rows);
+  return true;
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h b/libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h
new file mode 100755
index 0000000000..17c9bbe04d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/cuda_utils.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cuda_runtime.h>
+#include <vector>
+
+#ifndef CUDA_CHECK
+#define CUDA_CHECK(callstr)                                                    \
+  {                                                                            \
+    cudaError_t error_code = callstr;                                          \
+    if (error_code != cudaSuccess) {                                           \
+      std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":";   \
+      std::cerr << __LINE__;                                                   \
+      assert(0);                                                               \
+    }                                                                          \
+  }
+#endif // CUDA_CHECK
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+void CudaYoloPreprocess(uint8_t *src, int src_width, int src_height, float *dst,
+                        int dst_width, int dst_height,
+                        const std::vector<float> padding_value,
+                        cudaStream_t stream);
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc b/libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc
new file mode 100755
index 0000000000..02e853146d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/dark_parse.cc
@@ -0,0 +1,81 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+void DarkParse(const std::vector<float> &heatmap, const std::vector<int> &dim,
+               std::vector<float> *coords, const int px, const int py,
+               const int index, const int ch) {
+  /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
+  Representation for Human Pose Estimation (CVPR 2020).
+  1) offset = - hassian.inv() * derivative
+  2) dx = (heatmap[x+1] - heatmap[x-1])/2.
+  3) dxx = (dx[x+1] - dx[x-1])/2.
+  4) derivative = Mat([dx, dy])
+  5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
+  */
+  std::vector<float>::const_iterator first1 = heatmap.begin() + index;
+  std::vector<float>::const_iterator last1 =
+      heatmap.begin() + index + dim[2] * dim[3];
+  std::vector<float> heatmap_ch(first1, last1);
+  cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
+  heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
+  cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
+  heatmap_mat = heatmap_mat.reshape(1, 1);
+  heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
+
+  float epsilon = 1e-10;
+  // sample heatmap to get values in around target location
+  float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
+  float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
+  float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
+
+  float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
+  float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
+  float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
+  float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
+  float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
+  float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
+  float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
+  float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
+  float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
+  float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
+
+  // compute dx/dy and dxx/dyy with sampled values
+  float dx = 0.5 * (xr - xl);
+  float dy = 0.5 * (yu - yd);
+  float dxx = 0.25 * (xr2 - 2 * xy + xl2);
+  float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
+  float dyy = 0.25 * (yu2 - 2 * xy + yd2);
+
+  // finally get offset by derivative and hassian, which combined by dx/dy and
+  // dxx/dyy
+  if (dxx * dyy - dxy * dxy != 0) {
+    float M[2][2] = {dxx, dxy, dxy, dyy};
+    float D[2] = {dx, dy};
+    cv::Mat hassian(2, 2, CV_32F, M);
+    cv::Mat derivative(2, 1, CV_32F, D);
+    cv::Mat offset = -hassian.inv() * derivative;
+    (*coords)[ch * 2] += offset.at<float>(0, 0);
+    (*coords)[ch * 2 + 1] += offset.at<float>(1, 0);
+  }
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/face_align.cc b/libs/ultrainfer/ultrainfer/vision/utils/face_align.cc
new file mode 100755
index 0000000000..90114aeaf6
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/face_align.cc
@@ -0,0 +1,152 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// reference:
+// https://github.com/deepinsight/insightface/blob/master/recognition/_tools_/cpp_align/face_align.h
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+cv::Mat MeanAxis0(const cv::Mat &src) {
+  int num = src.rows;
+  int dim = src.cols;
+  cv::Mat output(1, dim, CV_32F);
+  for (int i = 0; i < dim; i++) {
+    float sum = 0;
+    for (int j = 0; j < num; j++) {
+      sum += src.at<float>(j, i);
+    }
+    output.at<float>(0, i) = sum / num;
+  }
+  return output;
+}
+
+cv::Mat ElementwiseMinus(const cv::Mat &A, const cv::Mat &B) {
+  cv::Mat output(A.rows, A.cols, A.type());
+  assert(B.cols == A.cols);
+  if (B.cols == A.cols) {
+    for (int i = 0; i < A.rows; i++) {
+      for (int j = 0; j < B.cols; j++) {
+        output.at<float>(i, j) = A.at<float>(i, j) - B.at<float>(0, j);
+      }
+    }
+  }
+  return output;
+}
+
+cv::Mat VarAxis0(const cv::Mat &src) {
+  cv::Mat temp_ = ElementwiseMinus(src, MeanAxis0(src));
+  cv::multiply(temp_, temp_, temp_);
+  return MeanAxis0(temp_);
+}
+
+int MatrixRank(cv::Mat M) {
+  cv::Mat w, u, vt;
+  cv::SVD::compute(M, w, u, vt);
+  cv::Mat1b non_zero_singular_values = w > 0.0001;
+  int rank = countNonZero(non_zero_singular_values);
+  return rank;
+}
+
+cv::Mat SimilarTransform(cv::Mat &dst, cv::Mat &src) {
+  int num = dst.rows;
+  int dim = dst.cols;
+  cv::Mat src_mean = MeanAxis0(dst);
+  cv::Mat dst_mean = MeanAxis0(src);
+  cv::Mat src_demean = ElementwiseMinus(dst, src_mean);
+  cv::Mat dst_demean = ElementwiseMinus(src, dst_mean);
+  cv::Mat A = (dst_demean.t() * src_demean) / static_cast<float>(num);
+  cv::Mat d(dim, 1, CV_32F);
+  d.setTo(1.0f);
+  if (cv::determinant(A) < 0) {
+    d.at<float>(dim - 1, 0) = -1;
+  }
+  cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F);
+  cv::Mat U, S, V;
+  cv::SVD::compute(A, S, U, V);
+  int rank = MatrixRank(A);
+  if (rank == 0) {
+    assert(rank == 0);
+  } else if (rank == dim - 1) {
+    if (cv::determinant(U) * cv::determinant(V) > 0) {
+      T.rowRange(0, dim).colRange(0, dim) = U * V;
+    } else {
+      int s = d.at<float>(dim - 1, 0) = -1;
+      d.at<float>(dim - 1, 0) = -1;
+
+      T.rowRange(0, dim).colRange(0, dim) = U * V;
+      cv::Mat diag_ = cv::Mat::diag(d);
+      cv::Mat twp = diag_ * V; // np.dot(np.diag(d), V.T)
+      cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1);
+      cv::Mat C = B.diag(0);
+      T.rowRange(0, dim).colRange(0, dim) = U * twp;
+      d.at<float>(dim - 1, 0) = s;
+    }
+  } else {
+    cv::Mat diag_ = cv::Mat::diag(d);
+    cv::Mat twp = diag_ * V.t(); // np.dot(np.diag(d), V.T)
+    cv::Mat res = U * twp;       // U
+    T.rowRange(0, dim).colRange(0, dim) = -U.t() * twp;
+  }
+  cv::Mat var_ = VarAxis0(src_demean);
+  float val = cv::sum(var_).val[0];
+  cv::Mat res;
+  cv::multiply(d, S, res);
+  float scale = 1.0 / val * cv::sum(res).val[0];
+  T.rowRange(0, dim).colRange(0, dim) =
+      -T.rowRange(0, dim).colRange(0, dim).t();
+  cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim]
+  cv::Mat temp2 = src_mean.t();                        // src_mean.T
+  cv::Mat temp3 = temp1 * temp2; // np.dot(T[:dim, :dim], src_mean.T)
+  cv::Mat temp4 = scale * temp3;
+  T.rowRange(0, dim).colRange(dim, dim + 1) = -(temp4 - dst_mean.t());
+  T.rowRange(0, dim).colRange(0, dim) *= scale;
+  return T;
+}
+
+std::vector<cv::Mat>
+AlignFaceWithFivePoints(cv::Mat &image, FaceDetectionResult &result,
+                        std::vector<std::array<float, 2>> std_landmarks,
+                        std::array<int, 2> output_size) {
+  FDASSERT(std_landmarks.size() == 5, "The landmarks.size() must be 5.")
+  FDASSERT(!image.empty(), "The input_image can't be empty.")
+  std::vector<cv::Mat> output_images;
+  output_images.reserve(result.scores.size());
+  if (result.boxes.empty()) {
+    FDWARNING << "The result is empty." << std::endl;
+    return output_images;
+  }
+
+  cv::Mat src(5, 2, CV_32FC1, std_landmarks.data());
+  for (int i = 0; i < result.landmarks.size(); i += 5) {
+    cv::Mat dst(5, 2, CV_32FC1, result.landmarks.data() + i);
+    cv::Mat m = SimilarTransform(dst, src);
+    cv::Mat map_matrix;
+    cv::Rect map_matrix_r = cv::Rect(0, 0, 3, 2);
+    cv::Mat(m, map_matrix_r).copyTo(map_matrix);
+    cv::Mat cropped_image_aligned;
+    cv::warpAffine(image, cropped_image_aligned, map_matrix,
+                   {output_size[0], output_size[1]});
+    if (cropped_image_aligned.empty()) {
+      FDWARNING << "croppedImageAligned is empty." << std::endl;
+    }
+    output_images.emplace_back(cropped_image_aligned);
+  }
+  return output_images;
+}
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc b/libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc
new file mode 100755
index 0000000000..031e32711d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/l2_normalize.cc
@@ -0,0 +1,41 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+std::vector<float> L2Normalize(const std::vector<float> &values) {
+  size_t num_val = values.size();
+  if (num_val == 0) {
+    return {};
+  }
+  std::vector<float> norm;
+  float l2_sum_val = 0.f;
+  for (size_t i = 0; i < num_val; ++i) {
+    l2_sum_val += (values[i] * values[i]);
+  }
+  float l2_sum_sqrt = std::sqrt(l2_sum_val);
+  norm.resize(num_val);
+  for (size_t i = 0; i < num_val; ++i) {
+    norm[i] = values[i] / l2_sum_sqrt;
+  }
+  return norm;
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/nms.cc b/libs/ultrainfer/ultrainfer/vision/utils/nms.cc
new file mode 100755
index 0000000000..73859b1636
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/nms.cc
@@ -0,0 +1,142 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/utils/perf.h"
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+// The implementation refers to
+// https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/cpp/src/utils.cc
+void NMS(DetectionResult *result, float iou_threshold,
+         std::vector<int> *index) {
+  // get sorted score indices
+  std::vector<int> sorted_indices;
+  if (index != nullptr) {
+    std::map<float, int, std::greater<float>> score_map;
+    for (size_t i = 0; i < result->scores.size(); ++i) {
+      score_map.insert(std::pair<float, int>(result->scores[i], i));
+    }
+    for (auto iter : score_map) {
+      sorted_indices.push_back(iter.second);
+    }
+  }
+  utils::SortDetectionResult(result);
+
+  std::vector<float> area_of_boxes(result->boxes.size());
+  std::vector<int> suppressed(result->boxes.size(), 0);
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) *
+                       (result->boxes[i][3] - result->boxes[i][1]);
+  }
+
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    if (suppressed[i] == 1) {
+      continue;
+    }
+    for (size_t j = i + 1; j < result->boxes.size(); ++j) {
+      if (suppressed[j] == 1) {
+        continue;
+      }
+      float xmin = std::max(result->boxes[i][0], result->boxes[j][0]);
+      float ymin = std::max(result->boxes[i][1], result->boxes[j][1]);
+      float xmax = std::min(result->boxes[i][2], result->boxes[j][2]);
+      float ymax = std::min(result->boxes[i][3], result->boxes[j][3]);
+      float overlap_w = std::max(0.0f, xmax - xmin);
+      float overlap_h = std::max(0.0f, ymax - ymin);
+      float overlap_area = overlap_w * overlap_h;
+      float overlap_ratio =
+          overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area);
+      if (overlap_ratio > iou_threshold) {
+        suppressed[j] = 1;
+      }
+    }
+  }
+  DetectionResult backup(*result);
+  result->Clear();
+  result->Reserve(suppressed.size());
+  for (size_t i = 0; i < suppressed.size(); ++i) {
+    if (suppressed[i] == 1) {
+      continue;
+    }
+    result->boxes.emplace_back(backup.boxes[i]);
+    result->scores.push_back(backup.scores[i]);
+    result->label_ids.push_back(backup.label_ids[i]);
+    if (index != nullptr) {
+      index->push_back(sorted_indices[i]);
+    }
+  }
+}
+
+void NMS(FaceDetectionResult *result, float iou_threshold) {
+  utils::SortDetectionResult(result);
+
+  std::vector<float> area_of_boxes(result->boxes.size());
+  std::vector<int> suppressed(result->boxes.size(), 0);
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) *
+                       (result->boxes[i][3] - result->boxes[i][1]);
+  }
+
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    if (suppressed[i] == 1) {
+      continue;
+    }
+    for (size_t j = i + 1; j < result->boxes.size(); ++j) {
+      if (suppressed[j] == 1) {
+        continue;
+      }
+      float xmin = std::max(result->boxes[i][0], result->boxes[j][0]);
+      float ymin = std::max(result->boxes[i][1], result->boxes[j][1]);
+      float xmax = std::min(result->boxes[i][2], result->boxes[j][2]);
+      float ymax = std::min(result->boxes[i][3], result->boxes[j][3]);
+      float overlap_w = std::max(0.0f, xmax - xmin);
+      float overlap_h = std::max(0.0f, ymax - ymin);
+      float overlap_area = overlap_w * overlap_h;
+      float overlap_ratio =
+          overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area);
+      if (overlap_ratio > iou_threshold) {
+        suppressed[j] = 1;
+      }
+    }
+  }
+  FaceDetectionResult backup(*result);
+  int landmarks_per_face = result->landmarks_per_face;
+
+  result->Clear();
+  // don't forget to reset the landmarks_per_face
+  // before apply Reserve method.
+  result->landmarks_per_face = landmarks_per_face;
+  result->Reserve(suppressed.size());
+  for (size_t i = 0; i < suppressed.size(); ++i) {
+    if (suppressed[i] == 1) {
+      continue;
+    }
+    result->boxes.emplace_back(backup.boxes[i]);
+    result->scores.push_back(backup.scores[i]);
+    // landmarks (if have)
+    if (result->landmarks_per_face > 0) {
+      for (size_t j = 0; j < result->landmarks_per_face; ++j) {
+        result->landmarks.emplace_back(
+            backup.landmarks[i * result->landmarks_per_face + j]);
+      }
+    }
+  }
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc b/libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc
new file mode 100755
index 0000000000..d55e98e7ad
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/sort_det_res.cc
@@ -0,0 +1,187 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+void Merge(DetectionResult *result, size_t low, size_t mid, size_t high) {
+  std::vector<std::array<float, 4>> &boxes = result->boxes;
+  std::vector<float> &scores = result->scores;
+  std::vector<int32_t> &label_ids = result->label_ids;
+  std::vector<std::array<float, 4>> temp_boxes(boxes);
+  std::vector<float> temp_scores(scores);
+  std::vector<int32_t> temp_label_ids(label_ids);
+  size_t i = low;
+  size_t j = mid + 1;
+  size_t k = i;
+  // TODO(qiuyanjun): add masks process
+  for (; i <= mid && j <= high; k++) {
+    if (temp_scores[i] >= temp_scores[j]) {
+      scores[k] = temp_scores[i];
+      label_ids[k] = temp_label_ids[i];
+      boxes[k] = temp_boxes[i];
+      i++;
+    } else {
+      scores[k] = temp_scores[j];
+      label_ids[k] = temp_label_ids[j];
+      boxes[k] = temp_boxes[j];
+      j++;
+    }
+  }
+  while (i <= mid) {
+    scores[k] = temp_scores[i];
+    label_ids[k] = temp_label_ids[i];
+    boxes[k] = temp_boxes[i];
+    k++;
+    i++;
+  }
+  while (j <= high) {
+    scores[k] = temp_scores[j];
+    label_ids[k] = temp_label_ids[j];
+    boxes[k] = temp_boxes[j];
+    k++;
+    j++;
+  }
+}
+
+void MergeSort(DetectionResult *result, size_t low, size_t high) {
+  if (low < high) {
+    size_t mid = (high - low) / 2 + low;
+    MergeSort(result, low, mid);
+    MergeSort(result, mid + 1, high);
+    Merge(result, low, mid, high);
+  }
+}
+
+void SortDetectionResult(DetectionResult *result) {
+  size_t low = 0;
+  size_t high = result->scores.size();
+  if (high == 0) {
+    return;
+  }
+  high = high - 1;
+  MergeSort(result, low, high);
+}
+
+template <typename T>
+bool LexSortByXYCompare(const std::array<T, 4> &box_a,
+                        const std::array<T, 4> &box_b) {
+  // WARN: The status shoule be false if (a==b).
+  // https://blog.csdn.net/xxxwrq/article/details/83080640
+  auto is_equal = [](const T &a, const T &b) -> bool {
+    return std::abs(a - b) < 1e-6f;
+  };
+  const T &x0_a = box_a[0];
+  const T &y0_a = box_a[1];
+  const T &x0_b = box_b[0];
+  const T &y0_b = box_b[1];
+  if (is_equal(x0_a, x0_b)) {
+    return is_equal(y0_a, y0_b) ? false : y0_a > y0_b;
+  }
+  return x0_a > x0_b;
+}
+
+// Only for int dtype
+template <>
+bool LexSortByXYCompare(const std::array<int, 4> &box_a,
+                        const std::array<int, 4> &box_b) {
+  const int &x0_a = box_a[0];
+  const int &y0_a = box_a[1];
+  const int &x0_b = box_b[0];
+  const int &y0_b = box_b[1];
+  if (x0_a == x0_b) {
+    return y0_a == y0_b ? false : y0_a > y0_b;
+  }
+  return x0_a > x0_b;
+}
+
+void ReorderDetectionResultByIndices(DetectionResult *result,
+                                     const std::vector<size_t> &indices) {
+  // reorder boxes, scores, label_ids, masks
+  DetectionResult backup = (*result);
+  const bool contain_masks = backup.contain_masks;
+  const int boxes_num = backup.boxes.size();
+  result->Clear();
+  result->Resize(boxes_num);
+  // boxes, scores, labels_ids
+  for (int i = 0; i < boxes_num; ++i) {
+    result->boxes[i] = backup.boxes[indices[i]];
+    result->scores[i] = backup.scores[indices[i]];
+    result->label_ids[i] = backup.label_ids[indices[i]];
+  }
+  if (contain_masks) {
+    result->contain_masks = true;
+    for (int i = 0; i < boxes_num; ++i) {
+      const auto &shape = backup.masks[indices[i]].shape;
+      const int mask_numel = shape[0] * shape[1];
+      result->masks[i].shape = shape;
+      result->masks[i].Resize(mask_numel);
+      std::memcpy(result->masks[i].Data(), backup.masks[indices[i]].Data(),
+                  mask_numel * sizeof(uint8_t));
+    }
+  }
+}
+
+void LexSortDetectionResultByXY(DetectionResult *result) {
+  if (result->boxes.empty()) {
+    return;
+  }
+  std::vector<size_t> indices;
+  indices.resize(result->boxes.size());
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    indices[i] = i;
+  }
+  // lex sort by x(w) then y(h)
+  auto &boxes = result->boxes;
+  std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) {
+    return LexSortByXYCompare(boxes[a], boxes[b]);
+  });
+  ReorderDetectionResultByIndices(result, indices);
+}
+
+void LexSortOCRDetResultByXY(std::vector<std::array<int, 8>> *result) {
+  if (result->empty()) {
+    return;
+  }
+  std::vector<size_t> indices;
+  indices.resize(result->size());
+  std::vector<std::array<int, 4>> boxes;
+  boxes.resize(result->size());
+  for (size_t i = 0; i < result->size(); ++i) {
+    indices[i] = i;
+    // 4 points to 2 points for LexSort
+    boxes[i] = {(*result)[i][0], (*result)[i][1], (*result)[i][6],
+                (*result)[i][7]};
+  }
+  // lex sort by x(w) then y(h)
+  std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) {
+    return LexSortByXYCompare(boxes[a], boxes[b]);
+  });
+  // reorder boxes
+  std::vector<std::array<int, 8>> backup = (*result);
+  const int boxes_num = backup.size();
+  result->clear();
+  result->resize(boxes_num);
+  // boxes
+  for (int i = 0; i < boxes_num; ++i) {
+    (*result)[i] = backup[indices[i]];
+  }
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc b/libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc
new file mode 100755
index 0000000000..31798a102c
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/sort_face_det_res.cc
@@ -0,0 +1,69 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+void SortDetectionResult(FaceDetectionResult *result) {
+  // sort face detection results with landmarks or not.
+  if (result->boxes.size() == 0) {
+    return;
+  }
+  int landmarks_per_face = result->landmarks_per_face;
+  if (landmarks_per_face > 0) {
+    FDASSERT(
+        (result->landmarks.size() == result->boxes.size() * landmarks_per_face),
+        "The size of landmarks != boxes.size * landmarks_per_face.");
+  }
+
+  // argsort for scores.
+  std::vector<size_t> indices;
+  indices.resize(result->boxes.size());
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    indices[i] = i;
+  }
+  std::vector<float> &scores = result->scores;
+  std::sort(indices.begin(), indices.end(),
+            [&scores](size_t a, size_t b) { return scores[a] > scores[b]; });
+
+  // reorder boxes, scores, landmarks (if have).
+  FaceDetectionResult backup(*result);
+  result->Clear();
+  // don't forget to reset the landmarks_per_face
+  // before apply Reserve method.
+  result->landmarks_per_face = landmarks_per_face;
+  result->Reserve(indices.size());
+  if (landmarks_per_face > 0) {
+    for (size_t i = 0; i < indices.size(); ++i) {
+      result->boxes.emplace_back(backup.boxes[indices[i]]);
+      result->scores.push_back(backup.scores[indices[i]]);
+      for (size_t j = 0; j < landmarks_per_face; ++j) {
+        result->landmarks.emplace_back(
+            backup.landmarks[indices[i] * landmarks_per_face + j]);
+      }
+    }
+  } else {
+    for (size_t i = 0; i < indices.size(); ++i) {
+      result->boxes.emplace_back(backup.boxes[indices[i]]);
+      result->scores.push_back(backup.scores[indices[i]]);
+    }
+  }
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/utils.h b/libs/ultrainfer/ultrainfer/vision/utils/utils.h
new file mode 100755
index 0000000000..0c9342751d
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/utils.h
@@ -0,0 +1,124 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <set>
+#include <vector>
+
+#include "ultrainfer/core/fd_tensor.h"
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/common/result.h"
+
+// #include "unsupported/Eigen/CXX11/Tensor"
+#include "ultrainfer/function/reduce.h"
+#include "ultrainfer/function/softmax.h"
+#include "ultrainfer/function/transpose.h"
+#include "ultrainfer/vision/common/processors/mat.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+// topk sometimes is a very small value
+// so this implementation is simple but I don't think it will
+// cost too much time
+// Also there may be cause problem since we suppose the minimum value is
+// -99999999
+// Do not use this function on array which topk contains value less than
+// -99999999
+template <typename T>
+std::vector<int32_t> TopKIndices(const T *array, int array_size, int topk) {
+  topk = std::min(array_size, topk);
+  std::vector<int32_t> res(topk);
+  std::set<int32_t> searched;
+  for (int32_t i = 0; i < topk; ++i) {
+    T min = static_cast<T>(-99999999);
+    for (int32_t j = 0; j < array_size; ++j) {
+      if (searched.find(j) != searched.end()) {
+        continue;
+      }
+      if (*(array + j) > min) {
+        res[i] = j;
+        min = *(array + j);
+      }
+    }
+    searched.insert(res[i]);
+  }
+  return res;
+}
+
+void NMS(DetectionResult *output, float iou_threshold = 0.5,
+         std::vector<int> *index = nullptr);
+
+void NMS(FaceDetectionResult *result, float iou_threshold = 0.5);
+
+/// Sort DetectionResult/FaceDetectionResult by score
+ULTRAINFER_DECL void SortDetectionResult(DetectionResult *result);
+ULTRAINFER_DECL void SortDetectionResult(FaceDetectionResult *result);
+/// Lex Sort DetectionResult by x(w) & y(h) axis
+ULTRAINFER_DECL void LexSortDetectionResultByXY(DetectionResult *result);
+/// Lex Sort OCRDet Result by x(w) & y(h) axis
+ULTRAINFER_DECL void
+LexSortOCRDetResultByXY(std::vector<std::array<int, 8>> *result);
+
+/// L2 Norm / cosine similarity  (for face recognition, ...)
+ULTRAINFER_DECL std::vector<float>
+L2Normalize(const std::vector<float> &values);
+
+ULTRAINFER_DECL float CosineSimilarity(const std::vector<float> &a,
+                                       const std::vector<float> &b,
+                                       bool normalized = true);
+
+/** \brief Do face align for model with five points.
+ *
+ * \param[in] image The original image
+ * \param[in] result FaceDetectionResult
+ * \param[in] std_landmarks Standard face template
+ * \param[in] output_size The size of output mat
+ */
+ULTRAINFER_DECL std::vector<cv::Mat> AlignFaceWithFivePoints(
+    cv::Mat &image, FaceDetectionResult &result,
+    std::vector<std::array<float, 2>> std_landmarks = {{38.2946f, 51.6963f},
+                                                       {73.5318f, 51.5014f},
+                                                       {56.0252f, 71.7366f},
+                                                       {41.5493f, 92.3655f},
+                                                       {70.7299f, 92.2041f}},
+    std::array<int, 2> output_size = {112, 112});
+
+bool CropImageByBox(Mat &src_im, Mat *dst_im, const std::vector<float> &box,
+                    std::vector<float> *center, std::vector<float> *scale,
+                    const float expandratio = 0.3);
+
+/**
+ * Function: for keypoint detection model, fine positioning of keypoints in
+ * postprocess
+ * Parameters:
+ * heatmap: model inference results for keypoint detection models
+ * dim: shape information of the inference result
+ * coords: coordinates after refined positioning
+ * px: px = int(coords[ch * 2] + 0.5) , refer to API
+ * detection::GetFinalPredictions py: px = int(coords[ch * 2 + 1] + 0.5), refer
+ * to API detection::GetFinalPredictions index: index information of heatmap
+ * pixels ch: channel Paper reference: DARK postpocessing, Zhang et al.
+ * Distribution-Aware Coordinate Representation for Human Pose Estimation (CVPR
+ * 2020).
+ */
+void DarkParse(const std::vector<float> &heatmap, const std::vector<int> &dim,
+               std::vector<float> *coords, const int px, const int py,
+               const int index, const int ch);
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu b/libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu
new file mode 100755
index 0000000000..0ae81a63bd
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/utils/yolo_preprocess.cu
@@ -0,0 +1,153 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Part of the following code in this file refs to
+// https://github.com/wang-xinyu/tensorrtx/blob/yolov5-v6.0/yolov5/preprocess.cu
+//
+// Copyright (c) 2022 tensorrtx
+// Licensed under The MIT License
+// \file preprocess.cu
+// \brief
+// \author Qi Liu, Xinyu Wang
+
+#ifdef WITH_GPU
+#include <opencv2/opencv.hpp>
+
+#include "ultrainfer/vision/utils/cuda_utils.h"
+
+namespace ultrainfer {
+namespace vision {
+namespace utils {
+
+struct AffineMatrix {
+  float value[6];
+};
+
+__global__ void
+YoloPreprocessCudaKernel(uint8_t *src, int src_line_size, int src_width,
+                         int src_height, float *dst, int dst_width,
+                         int dst_height, uint8_t padding_color_b,
+                         uint8_t padding_color_g, uint8_t padding_color_r,
+                         AffineMatrix d2s, int edge) {
+  int position = blockDim.x * blockIdx.x + threadIdx.x;
+  if (position >= edge)
+    return;
+
+  float m_x1 = d2s.value[0];
+  float m_y1 = d2s.value[1];
+  float m_z1 = d2s.value[2];
+  float m_x2 = d2s.value[3];
+  float m_y2 = d2s.value[4];
+  float m_z2 = d2s.value[5];
+
+  int dx = position % dst_width;
+  int dy = position / dst_width;
+  float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
+  float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
+  float c0, c1, c2;
+
+  if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
+    // out of range
+    c0 = padding_color_b;
+    c1 = padding_color_g;
+    c2 = padding_color_r;
+  } else {
+    int y_low = floorf(src_y);
+    int x_low = floorf(src_x);
+    int y_high = y_low + 1;
+    int x_high = x_low + 1;
+
+    uint8_t const_value[] = {padding_color_b, padding_color_g, padding_color_r};
+    float ly = src_y - y_low;
+    float lx = src_x - x_low;
+    float hy = 1 - ly;
+    float hx = 1 - lx;
+    float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+    uint8_t *v1 = const_value;
+    uint8_t *v2 = const_value;
+    uint8_t *v3 = const_value;
+    uint8_t *v4 = const_value;
+
+    if (y_low >= 0) {
+      if (x_low >= 0)
+        v1 = src + y_low * src_line_size + x_low * 3;
+      if (x_high < src_width)
+        v2 = src + y_low * src_line_size + x_high * 3;
+    }
+
+    if (y_high < src_height) {
+      if (x_low >= 0)
+        v3 = src + y_high * src_line_size + x_low * 3;
+      if (x_high < src_width)
+        v4 = src + y_high * src_line_size + x_high * 3;
+    }
+
+    c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
+    c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
+    c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
+  }
+
+  // bgr to rgb
+  float t = c2;
+  c2 = c0;
+  c0 = t;
+
+  // normalization
+  c0 = c0 / 255.0f;
+  c1 = c1 / 255.0f;
+  c2 = c2 / 255.0f;
+
+  // rgbrgbrgb to rrrgggbbb
+  int area = dst_width * dst_height;
+  float *pdst_c0 = dst + dy * dst_width + dx;
+  float *pdst_c1 = pdst_c0 + area;
+  float *pdst_c2 = pdst_c1 + area;
+  *pdst_c0 = c0;
+  *pdst_c1 = c1;
+  *pdst_c2 = c2;
+}
+
+void CudaYoloPreprocess(uint8_t *src, int src_width, int src_height, float *dst,
+                        int dst_width, int dst_height,
+                        const std::vector<float> padding_value,
+                        cudaStream_t stream) {
+  AffineMatrix s2d, d2s;
+  float scale =
+      std::min(dst_height / (float)src_height, dst_width / (float)src_width);
+
+  s2d.value[0] = scale;
+  s2d.value[1] = 0;
+  s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5;
+  s2d.value[3] = 0;
+  s2d.value[4] = scale;
+  s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5;
+
+  cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
+  cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
+  cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
+
+  memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
+
+  int jobs = dst_height * dst_width;
+  int threads = 256;
+  int blocks = ceil(jobs / (float)threads);
+  YoloPreprocessCudaKernel<<<blocks, threads, 0, stream>>>(
+      src, src_width * 3, src_width, src_height, dst, dst_width, dst_height,
+      padding_value[0], padding_value[1], padding_value[2], d2s, jobs);
+}
+
+} // namespace utils
+} // namespace vision
+} // namespace ultrainfer
+#endif
diff --git a/libs/ultrainfer/ultrainfer/vision/vision_pybind.cc b/libs/ultrainfer/ultrainfer/vision/vision_pybind.cc
new file mode 100755
index 0000000000..ad1a23a729
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/vision_pybind.cc
@@ -0,0 +1,292 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+
+void BindFDMat(pybind11::module &m);
+void BindFDMatBatch(pybind11::module &m);
+void BindProcessors(pybind11::module &m);
+void BindDetection(pybind11::module &m);
+void BindClassification(pybind11::module &m);
+void BindSegmentation(pybind11::module &m);
+void BindMatting(pybind11::module &m);
+void BindFaceDet(pybind11::module &m);
+void BindFaceAlign(pybind11::module &m);
+void BindFaceId(pybind11::module &m);
+void BindOcr(pybind11::module &m);
+void BindTracking(pybind11::module &m);
+void BindKeyPointDetection(pybind11::module &m);
+void BindHeadPose(pybind11::module &m);
+void BindSR(pybind11::module &m);
+void BindGeneration(pybind11::module &m);
+void BindVisualize(pybind11::module &m);
+void BindPerception(pybind11::module &m);
+
+void BindVision(pybind11::module &m) {
+  pybind11::class_<vision::Mask>(m, "Mask")
+      .def(pybind11::init())
+      .def_readwrite("data", &vision::Mask::data)
+      .def_readwrite("shape", &vision::Mask::shape)
+      .def(pybind11::pickle(
+          [](const vision::Mask &m) {
+            return pybind11::make_tuple(m.data, m.shape);
+          },
+          [](pybind11::tuple t) {
+            if (t.size() != 2)
+              throw std::runtime_error(
+                  "vision::Mask pickle with invalid state!");
+
+            vision::Mask m;
+            m.data = t[0].cast<std::vector<uint32_t>>();
+            m.shape = t[1].cast<std::vector<int64_t>>();
+
+            return m;
+          }))
+      .def("__repr__", &vision::Mask::Str)
+      .def("__str__", &vision::Mask::Str);
+
+  pybind11::class_<vision::ClassifyResult>(m, "ClassifyResult")
+      .def(pybind11::init())
+      .def_readwrite("label_ids", &vision::ClassifyResult::label_ids)
+      .def_readwrite("scores", &vision::ClassifyResult::scores)
+      .def_readwrite("feature", &vision::ClassifyResult::feature)
+      .def(pybind11::pickle(
+          [](const vision::ClassifyResult &c) {
+            if (c.feature.empty()) {
+              return pybind11::make_tuple(c.label_ids, c.scores);
+            }
+            return pybind11::make_tuple(c.label_ids, c.scores, c.feature);
+          },
+          [](pybind11::tuple t) {
+            if ((t.size() != 2) && (t.size() != 3)) {
+              throw std::runtime_error(
+                  "vision::ClassifyResult pickle with invalid state!");
+            }
+
+            vision::ClassifyResult c;
+            c.label_ids = t[0].cast<std::vector<int32_t>>();
+            c.scores = t[1].cast<std::vector<float>>();
+            if (t.size() == 3) {
+              c.feature = t[2].cast<std::vector<float>>();
+            }
+
+            return c;
+          }))
+      .def("__repr__", &vision::ClassifyResult::Str)
+      .def("__str__", &vision::ClassifyResult::Str);
+
+  pybind11::class_<vision::DetectionResult>(m, "DetectionResult")
+      .def(pybind11::init())
+      .def_readwrite("boxes", &vision::DetectionResult::boxes)
+      .def_readwrite("scores", &vision::DetectionResult::scores)
+      .def_readwrite("rotated_boxes", &vision::DetectionResult::rotated_boxes)
+      .def_readwrite("label_ids", &vision::DetectionResult::label_ids)
+      .def_readwrite("masks", &vision::DetectionResult::masks)
+      .def_readwrite("contain_masks", &vision::DetectionResult::contain_masks)
+      .def(pybind11::pickle(
+          [](const vision::DetectionResult &d) {
+            return pybind11::make_tuple(d.boxes, d.scores, d.rotated_boxes,
+                                        d.label_ids, d.masks, d.contain_masks);
+          },
+          [](pybind11::tuple t) {
+            if (t.size() != 5)
+              throw std::runtime_error(
+                  "vision::DetectionResult pickle with Invalid state!");
+
+            vision::DetectionResult d;
+            d.boxes = t[0].cast<std::vector<std::array<float, 4>>>();
+            d.rotated_boxes = t[0].cast<std::vector<std::array<float, 8>>>();
+            d.scores = t[1].cast<std::vector<float>>();
+            d.label_ids = t[2].cast<std::vector<int32_t>>();
+            d.masks = t[3].cast<std::vector<vision::Mask>>();
+            d.contain_masks = t[4].cast<bool>();
+
+            return d;
+          }))
+      .def("__repr__", &vision::DetectionResult::Str)
+      .def("__str__", &vision::DetectionResult::Str);
+
+  pybind11::class_<vision::PerceptionResult>(m, "PerceptionResult")
+      .def(pybind11::init())
+      .def_readwrite("valid", &vision::PerceptionResult::valid)
+      .def_readwrite("scores", &vision::PerceptionResult::scores)
+      .def_readwrite("label_ids", &vision::PerceptionResult::label_ids)
+      .def_readwrite("boxes", &vision::PerceptionResult::boxes)
+      .def_readwrite("center", &vision::PerceptionResult::center)
+      .def_readwrite("observation_angle",
+                     &vision::PerceptionResult::observation_angle)
+      .def_readwrite("yaw_angle", &vision::PerceptionResult::yaw_angle)
+      .def_readwrite("velocity", &vision::PerceptionResult::velocity)
+      .def(pybind11::pickle(
+          [](const vision::PerceptionResult &d) {
+            return pybind11::make_tuple(d.scores, d.label_ids, d.boxes,
+                                        d.center, d.observation_angle,
+                                        d.yaw_angle, d.velocity);
+          },
+          [](pybind11::tuple t) {
+            if (t.size() != 7)
+              throw std::runtime_error(
+                  "vision::PerceptionResult pickle with Invalid state!");
+
+            vision::PerceptionResult d;
+            d.scores = t[0].cast<std::vector<float>>();
+            d.label_ids = t[1].cast<std::vector<int32_t>>();
+            d.boxes = t[2].cast<std::vector<std::array<float, 7>>>();
+            d.center = t[3].cast<std::vector<std::array<float, 3>>>();
+            d.observation_angle = t[4].cast<std::vector<float>>();
+            d.yaw_angle = t[5].cast<std::vector<float>>();
+            d.velocity = t[6].cast<std::vector<std::array<float, 3>>>();
+            return d;
+          }))
+      .def("__repr__", &vision::PerceptionResult::Str)
+      .def("__str__", &vision::PerceptionResult::Str);
+
+  pybind11::class_<vision::OCRResult>(m, "OCRResult")
+      .def(pybind11::init())
+      .def_readwrite("boxes", &vision::OCRResult::boxes)
+      .def_readwrite("text", &vision::OCRResult::text)
+      .def_readwrite("rec_scores", &vision::OCRResult::rec_scores)
+      .def_readwrite("cls_scores", &vision::OCRResult::cls_scores)
+      .def_readwrite("cls_labels", &vision::OCRResult::cls_labels)
+      .def_readwrite("table_boxes", &vision::OCRResult::table_boxes)
+      .def_readwrite("table_structure", &vision::OCRResult::table_structure)
+      .def_readwrite("table_html", &vision::OCRResult::table_html)
+      .def("__repr__", &vision::OCRResult::Str)
+      .def("__str__", &vision::OCRResult::Str);
+
+  pybind11::class_<vision::OCRCURVEResult>(m, "OCRCURVEResult")
+      .def(pybind11::init())
+      .def_readwrite("boxes", &vision::OCRCURVEResult::boxes)
+      .def_readwrite("text", &vision::OCRCURVEResult::text)
+      .def_readwrite("rec_scores", &vision::OCRCURVEResult::rec_scores)
+      .def_readwrite("cls_scores", &vision::OCRCURVEResult::cls_scores)
+      .def_readwrite("cls_labels", &vision::OCRCURVEResult::cls_labels)
+      .def_readwrite("table_boxes", &vision::OCRCURVEResult::table_boxes)
+      .def_readwrite("table_structure",
+                     &vision::OCRCURVEResult::table_structure)
+      .def_readwrite("table_html", &vision::OCRCURVEResult::table_html)
+      .def("__repr__", &vision::OCRCURVEResult::Str)
+      .def("__str__", &vision::OCRCURVEResult::Str);
+
+  pybind11::class_<vision::MOTResult>(m, "MOTResult")
+      .def(pybind11::init())
+      .def_readwrite("boxes", &vision::MOTResult::boxes)
+      .def_readwrite("ids", &vision::MOTResult::ids)
+      .def_readwrite("scores", &vision::MOTResult::scores)
+      .def_readwrite("class_ids", &vision::MOTResult::class_ids)
+      .def("__repr__", &vision::MOTResult::Str)
+      .def("__str__", &vision::MOTResult::Str);
+
+  pybind11::class_<vision::FaceDetectionResult>(m, "FaceDetectionResult")
+      .def(pybind11::init())
+      .def_readwrite("boxes", &vision::FaceDetectionResult::boxes)
+      .def_readwrite("scores", &vision::FaceDetectionResult::scores)
+      .def_readwrite("landmarks", &vision::FaceDetectionResult::landmarks)
+      .def_readwrite("landmarks_per_face",
+                     &vision::FaceDetectionResult::landmarks_per_face)
+      .def("__repr__", &vision::FaceDetectionResult::Str)
+      .def("__str__", &vision::FaceDetectionResult::Str);
+
+  pybind11::class_<vision::FaceAlignmentResult>(m, "FaceAlignmentResult")
+      .def(pybind11::init())
+      .def_readwrite("landmarks", &vision::FaceAlignmentResult::landmarks)
+      .def("__repr__", &vision::FaceAlignmentResult::Str)
+      .def("__str__", &vision::FaceAlignmentResult::Str);
+
+  pybind11::class_<vision::FaceRecognitionResult>(m, "FaceRecognitionResult")
+      .def(pybind11::init())
+      .def_readwrite("embedding", &vision::FaceRecognitionResult::embedding)
+      .def("__repr__", &vision::FaceRecognitionResult::Str)
+      .def("__str__", &vision::FaceRecognitionResult::Str);
+
+  pybind11::class_<vision::SegmentationResult>(m, "SegmentationResult")
+      .def(pybind11::init())
+      .def_readwrite("label_map", &vision::SegmentationResult::label_map)
+      .def_readwrite("score_map", &vision::SegmentationResult::score_map)
+      .def_readwrite("shape", &vision::SegmentationResult::shape)
+      .def_readwrite("contain_score_map",
+                     &vision::SegmentationResult::contain_score_map)
+      .def(pybind11::pickle(
+          [](const vision::SegmentationResult &s) {
+            return pybind11::make_tuple(s.label_map, s.score_map, s.shape,
+                                        s.contain_score_map);
+          },
+          [](pybind11::tuple t) {
+            if (t.size() != 4)
+              throw std::runtime_error(
+                  "vision::SegmentationResult pickle with Invalid state!");
+
+            vision::SegmentationResult s;
+            s.label_map = t[0].cast<std::vector<uint8_t>>();
+            s.score_map = t[1].cast<std::vector<float>>();
+            s.shape = t[2].cast<std::vector<int64_t>>();
+            s.contain_score_map = t[3].cast<bool>();
+
+            return s;
+          }))
+      .def("__repr__", &vision::SegmentationResult::Str)
+      .def("__str__", &vision::SegmentationResult::Str);
+
+  pybind11::class_<vision::MattingResult>(m, "MattingResult")
+      .def(pybind11::init())
+      .def_readwrite("alpha", &vision::MattingResult::alpha)
+      .def_readwrite("foreground", &vision::MattingResult::foreground)
+      .def_readwrite("shape", &vision::MattingResult::shape)
+      .def_readwrite("contain_foreground",
+                     &vision::MattingResult::contain_foreground)
+      .def("__repr__", &vision::MattingResult::Str)
+      .def("__str__", &vision::MattingResult::Str);
+
+  pybind11::class_<vision::KeyPointDetectionResult>(m,
+                                                    "KeyPointDetectionResult")
+      .def(pybind11::init())
+      .def_readwrite("keypoints", &vision::KeyPointDetectionResult::keypoints)
+      .def_readwrite("scores", &vision::KeyPointDetectionResult::scores)
+      .def_readwrite("num_joints", &vision::KeyPointDetectionResult::num_joints)
+      .def("__repr__", &vision::KeyPointDetectionResult::Str)
+      .def("__str__", &vision::KeyPointDetectionResult::Str);
+
+  pybind11::class_<vision::HeadPoseResult>(m, "HeadPoseResult")
+      .def(pybind11::init())
+      .def_readwrite("euler_angles", &vision::HeadPoseResult::euler_angles)
+      .def("__repr__", &vision::HeadPoseResult::Str)
+      .def("__str__", &vision::HeadPoseResult::Str);
+
+  m.def("enable_flycv", &vision::EnableFlyCV,
+        "Enable image preprocessing by FlyCV.");
+  m.def("disable_flycv", &vision::DisableFlyCV,
+        "Disable image preprocessing by FlyCV, change to use OpenCV.");
+
+  BindFDMat(m);
+  BindFDMatBatch(m);
+  BindProcessors(m);
+  BindDetection(m);
+  BindClassification(m);
+  BindSegmentation(m);
+  BindFaceDet(m);
+  BindFaceAlign(m);
+  BindFaceId(m);
+  BindMatting(m);
+  BindOcr(m);
+  BindTracking(m);
+  BindKeyPointDetection(m);
+  BindHeadPose(m);
+  BindSR(m);
+  BindGeneration(m);
+  BindVisualize(m);
+  BindPerception(m);
+}
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/classification.cc b/libs/ultrainfer/ultrainfer/vision/visualize/classification.cc
new file mode 100755
index 0000000000..10502ce345
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/classification.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat VisClassification(const cv::Mat &im, const ClassifyResult &result,
+                          int top_k, float score_threshold, float font_size) {
+  int h = im.rows;
+  int w = im.cols;
+  auto vis_im = im.clone();
+  int h_sep = h / 30;
+  int w_sep = w / 10;
+  if (top_k > result.scores.size()) {
+    top_k = result.scores.size();
+  }
+  for (int i = 0; i < top_k; ++i) {
+    if (result.scores[i] < score_threshold) {
+      continue;
+    }
+    std::string id = std::to_string(result.label_ids[i]);
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + "," + score;
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Point origin;
+    origin.x = w_sep;
+    origin.y = h_sep * (i + 1);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+  }
+  return vis_im;
+}
+
+// Visualize ClassifyResult with custom labels.
+cv::Mat VisClassification(const cv::Mat &im, const ClassifyResult &result,
+                          const std::vector<std::string> &labels, int top_k,
+                          float score_threshold, float font_size) {
+  int h = im.rows;
+  int w = im.cols;
+  auto vis_im = im.clone();
+  int h_sep = h / 30;
+  int w_sep = w / 10;
+  if (top_k > result.scores.size()) {
+    top_k = result.scores.size();
+  }
+  for (int i = 0; i < top_k; ++i) {
+    if (result.scores[i] < score_threshold) {
+      continue;
+    }
+    std::string id = std::to_string(result.label_ids[i]);
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + "," + score;
+    if (labels.size() > result.label_ids[i]) {
+      text = labels[result.label_ids[i]] + "," + text;
+    } else {
+      FDWARNING << "The label_id: " << result.label_ids[i]
+                << " in DetectionResult should be less than length of labels:"
+                << labels.size() << "." << std::endl;
+    }
+    if (text.size() > 16) {
+      text = text.substr(0, 16);
+    }
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Point origin;
+    origin.x = w_sep;
+    origin.y = h_sep * (i + 1);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+  }
+  return vis_im;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/detection.cc b/libs/ultrainfer/ultrainfer/vision/visualize/detection.cc
new file mode 100755
index 0000000000..2cb71a46e9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/detection.cc
@@ -0,0 +1,374 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat VisDetection(const cv::Mat &im, const DetectionResult &result,
+                     float score_threshold, int line_size, float font_size) {
+  if (result.boxes.empty() && result.rotated_boxes.empty()) {
+    return im;
+  }
+  if (result.contain_masks) {
+    FDASSERT(result.boxes.size() == result.masks.size(),
+             "The size of masks must be equal to the size of boxes, but now "
+             "%zu != %zu.",
+             result.boxes.size(), result.masks.size());
+  }
+  int max_label_id =
+      *std::max_element(result.label_ids.begin(), result.label_ids.end());
+  std::vector<int> color_map = GenerateColorMap(max_label_id);
+
+  int h = im.rows;
+  int w = im.cols;
+  auto vis_im = im.clone();
+  for (size_t i = 0; i < result.rotated_boxes.size(); ++i) {
+    if (result.scores[i] < score_threshold) {
+      continue;
+    }
+
+    int c0 = color_map[3 * result.label_ids[i] + 0];
+    int c1 = color_map[3 * result.label_ids[i] + 1];
+    int c2 = color_map[3 * result.label_ids[i] + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string id = std::to_string(result.label_ids[i]);
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + ", " + score;
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+
+    for (int j = 0; j < 4; j++) {
+      auto start = cv::Point(
+          static_cast<int>(round(result.rotated_boxes[i][2 * j])),
+          static_cast<int>(round(result.rotated_boxes[i][2 * j + 1])));
+
+      cv::Point end;
+      if (j != 3) {
+        end = cv::Point(
+            static_cast<int>(round(result.rotated_boxes[i][2 * (j + 1)])),
+            static_cast<int>(round(result.rotated_boxes[i][2 * (j + 1) + 1])));
+      } else {
+        end = cv::Point(static_cast<int>(round(result.rotated_boxes[i][0])),
+                        static_cast<int>(round(result.rotated_boxes[i][1])));
+        cv::putText(vis_im, text, end, font, font_size,
+                    cv::Scalar(255, 255, 255), 1);
+      }
+      cv::line(vis_im, start, end, cv::Scalar(255, 255, 255), 3, cv::LINE_AA,
+               0);
+    }
+  }
+
+  for (size_t box_i = 0; box_i < result.boxes.size(); ++box_i) {
+    if (result.scores[box_i] < score_threshold) {
+      continue;
+    }
+    int x1 = static_cast<int>(round(result.boxes[box_i][0]));
+    int y1 = static_cast<int>(round(result.boxes[box_i][1]));
+    int x2 = static_cast<int>(round(result.boxes[box_i][2]));
+    int y2 = static_cast<int>(round(result.boxes[box_i][3]));
+    int box_h = y2 - y1;
+    int box_w = x2 - x1;
+    int c0 = color_map[3 * result.label_ids[box_i] + 0];
+    int c1 = color_map[3 * result.label_ids[box_i] + 1];
+    int c2 = color_map[3 * result.label_ids[box_i] + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string id = std::to_string(result.label_ids[box_i]);
+    std::string score = std::to_string(result.scores[box_i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + ", " + score;
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+    cv::Point origin;
+    origin.x = x1;
+    origin.y = y1;
+    cv::Rect rect(x1, y1, box_w, box_h);
+    cv::rectangle(vis_im, rect, rect_color, line_size);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+    if (result.contain_masks) {
+      int mask_h = static_cast<int>(result.masks[box_i].shape[0]);
+      int mask_w = static_cast<int>(result.masks[box_i].shape[1]);
+      // non-const pointer for cv:Mat constructor
+      uint32_t *mask_raw_data = const_cast<uint32_t *>(
+          static_cast<const uint32_t *>(result.masks[box_i].Data()));
+      // only reference to mask data (zero copy)
+      cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data);
+      if ((mask_h != box_h) || (mask_w != box_w)) {
+        cv::resize(mask, mask, cv::Size(box_w, box_h));
+      }
+      // use a bright color for instance mask
+      int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
+      int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
+      int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
+      uint32_t *mask_data = reinterpret_cast<uint32_t *>(mask.data);
+      // inplace blending (zero copy)
+      uchar *vis_im_data = static_cast<uchar *>(vis_im.data);
+      for (size_t i = y1; i < y2; ++i) {
+        for (size_t j = x1; j < x2; ++j) {
+          if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
+            vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc0) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc1) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc2) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
+          }
+        }
+      }
+    }
+  }
+  return vis_im;
+}
+
+// Visualize DetectionResult with custom labels.
+cv::Mat VisDetection(const cv::Mat &im, const DetectionResult &result,
+                     const std::vector<std::string> &labels,
+                     float score_threshold, int line_size, float font_size,
+                     std::vector<int> font_color, int font_thickness) {
+  if (result.boxes.empty()) {
+    return im;
+  }
+  if (result.contain_masks) {
+    FDASSERT(result.boxes.size() == result.masks.size(),
+             "The size of masks must be equal to the size of boxes, but now "
+             "%zu != %zu.",
+             result.boxes.size(), result.masks.size());
+  }
+  int max_label_id =
+      *std::max_element(result.label_ids.begin(), result.label_ids.end());
+  std::vector<int> color_map = GenerateColorMap(max_label_id);
+
+  int h = im.rows;
+  int w = im.cols;
+  auto vis_im = im.clone();
+  auto font_color_ = cv::Scalar(font_color[0], font_color[1], font_color[2]);
+  for (size_t i = 0; i < result.rotated_boxes.size(); ++i) {
+    if (result.scores[i] < score_threshold) {
+      continue;
+    }
+
+    int c0 = color_map[3 * result.label_ids[i] + 0];
+    int c1 = color_map[3 * result.label_ids[i] + 1];
+    int c2 = color_map[3 * result.label_ids[i] + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string id = std::to_string(result.label_ids[i]);
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + ", " + score;
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+
+    for (int j = 0; j < 4; j++) {
+      auto start = cv::Point(
+          static_cast<int>(round(result.rotated_boxes[i][2 * j])),
+          static_cast<int>(round(result.rotated_boxes[i][2 * j + 1])));
+
+      cv::Point end;
+      if (j == 3) {
+        end = cv::Point(
+            static_cast<int>(round(result.rotated_boxes[i][2 * j])),
+            static_cast<int>(round(result.rotated_boxes[i][2 * j + 1])));
+      } else {
+        end = cv::Point(static_cast<int>(round(result.rotated_boxes[i][0])),
+                        static_cast<int>(round(result.rotated_boxes[i][1])));
+        cv::putText(vis_im, text, end, font, font_size, font_color_,
+                    font_thickness);
+      }
+      cv::line(vis_im, start, end, cv::Scalar(255, 255, 255), 3, cv::LINE_AA,
+               0);
+    }
+  }
+  for (size_t i = 0; i < result.boxes.size(); ++i) {
+    if (result.scores[i] < score_threshold) {
+      continue;
+    }
+    int x1 = static_cast<int>(result.boxes[i][0]);
+    int y1 = static_cast<int>(result.boxes[i][1]);
+    int x2 = static_cast<int>(result.boxes[i][2]);
+    int y2 = static_cast<int>(result.boxes[i][3]);
+    int box_h = y2 - y1;
+    int box_w = x2 - x1;
+    int c0 = color_map[3 * result.label_ids[i] + 0];
+    int c1 = color_map[3 * result.label_ids[i] + 1];
+    int c2 = color_map[3 * result.label_ids[i] + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string id = std::to_string(result.label_ids[i]);
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + "," + score;
+    if (labels.size() > result.label_ids[i]) {
+      text = labels[result.label_ids[i]] + "," + text;
+    } else {
+      FDWARNING << "The label_id: " << result.label_ids[i]
+                << " in DetectionResult should be less than length of labels:"
+                << labels.size() << "." << std::endl;
+    }
+    if (text.size() > 16) {
+      text = text.substr(0, 16);
+    }
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+    cv::Point origin;
+    origin.x = x1;
+    origin.y = y1;
+    cv::Rect rect(x1, y1, box_w, box_h);
+    cv::rectangle(vis_im, rect, rect_color, line_size);
+    cv::putText(vis_im, text, origin, font, font_size, font_color_,
+                font_thickness);
+    if (result.contain_masks) {
+      int mask_h = static_cast<int>(result.masks[i].shape[0]);
+      int mask_w = static_cast<int>(result.masks[i].shape[1]);
+      // non-const pointer for cv:Mat constructor
+      int32_t *mask_raw_data = const_cast<int32_t *>(
+          static_cast<const int32_t *>(result.masks[i].Data()));
+      // only reference to mask data (zero copy)
+      cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data);
+      if ((mask_h != box_h) || (mask_w != box_w)) {
+        cv::resize(mask, mask, cv::Size(box_w, box_h));
+      }
+      // use a bright color for instance mask
+      int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
+      int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
+      int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
+      int32_t *mask_data = reinterpret_cast<int32_t *>(mask.data);
+      // inplace blending (zero copy)
+      uchar *vis_im_data = static_cast<uchar *>(vis_im.data);
+      for (size_t i = y1; i < y2; ++i) {
+        for (size_t j = x1; j < x2; ++j) {
+          if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
+            vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc0) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc1) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc2) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
+          }
+        }
+      }
+    }
+  }
+  return vis_im;
+}
+
+// Default only support visualize num_classes <= 1000
+// If need to visualize num_classes > 1000
+// Please call Visualize::GetColorMap(num_classes) first
+cv::Mat Visualize::VisDetection(const cv::Mat &im,
+                                const DetectionResult &result,
+                                float score_threshold, int line_size,
+                                float font_size) {
+  if (result.boxes.empty()) {
+    return im;
+  }
+  FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisDetection is "
+               "deprecated, please use ultrainfer::vision:VisDetection "
+               "function instead."
+            << std::endl;
+  if (result.contain_masks) {
+    FDASSERT(result.boxes.size() == result.masks.size(),
+             "The size of masks must be equal the size of boxes!");
+  }
+  auto color_map = GetColorMap();
+  int h = im.rows;
+  int w = im.cols;
+  auto vis_im = im.clone();
+  for (size_t i = 0; i < result.boxes.size(); ++i) {
+    if (result.scores[i] < score_threshold) {
+      continue;
+    }
+    int x1 = static_cast<int>(result.boxes[i][0]);
+    int y1 = static_cast<int>(result.boxes[i][1]);
+    int x2 = static_cast<int>(result.boxes[i][2]);
+    int y2 = static_cast<int>(result.boxes[i][3]);
+    int box_h = y2 - y1;
+    int box_w = x2 - x1;
+    int c0 = color_map[3 * result.label_ids[i] + 0];
+    int c1 = color_map[3 * result.label_ids[i] + 1];
+    int c2 = color_map[3 * result.label_ids[i] + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string id = std::to_string(result.label_ids[i]);
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + "," + score;
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+    cv::Point origin;
+    origin.x = x1;
+    origin.y = y1;
+    cv::Rect rect(x1, y1, box_w, box_h);
+    cv::rectangle(vis_im, rect, rect_color, line_size);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+    if (result.contain_masks) {
+      int mask_h = static_cast<int>(result.masks[i].shape[0]);
+      int mask_w = static_cast<int>(result.masks[i].shape[1]);
+      // non-const pointer for cv:Mat constructor
+      int32_t *mask_raw_data = const_cast<int32_t *>(
+          static_cast<const int32_t *>(result.masks[i].Data()));
+      // only reference to mask data (zero copy)
+      cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data);
+      if ((mask_h != box_h) || (mask_w != box_w)) {
+        cv::resize(mask, mask, cv::Size(box_w, box_h));
+      }
+      // use a bright color for instance mask
+      int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
+      int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
+      int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
+      int32_t *mask_data = reinterpret_cast<int32_t *>(mask.data);
+      // inplace blending (zero copy)
+      uchar *vis_im_data = static_cast<uchar *>(vis_im.data);
+      for (size_t i = y1; i < y2; ++i) {
+        for (size_t j = x1; j < x2; ++j) {
+          if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
+            vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc0) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc1) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc2) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
+          }
+        }
+      }
+    }
+  }
+  return vis_im;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc b/libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc
new file mode 100755
index 0000000000..8f676d7f24
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/face_alignment.cc
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+cv::Mat VisFaceAlignment(const cv::Mat &im, const FaceAlignmentResult &result,
+                         int line_size) {
+  auto vis_im = im.clone();
+  // vis landmarks
+  cv::Scalar landmark_color = cv::Scalar(0, 255, 0);
+  for (size_t i = 0; i < result.landmarks.size(); ++i) {
+    cv::Point landmark;
+    landmark.x = static_cast<int>(result.landmarks[i][0]);
+    landmark.y = static_cast<int>(result.landmarks[i][1]);
+    cv::circle(vis_im, landmark, line_size, landmark_color, -1);
+  }
+  return vis_im;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc b/libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc
new file mode 100755
index 0000000000..a2ed40e6df
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/face_detection.cc
@@ -0,0 +1,137 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+cv::Mat VisFaceDetection(const cv::Mat &im, const FaceDetectionResult &result,
+                         int line_size, float font_size) {
+  auto color_map = GenerateColorMap();
+  int h = im.rows;
+  int w = im.cols;
+
+  auto vis_im = im.clone();
+  bool vis_landmarks = false;
+  if ((result.landmarks_per_face > 0) &&
+      (result.boxes.size() * result.landmarks_per_face ==
+       result.landmarks.size())) {
+    vis_landmarks = true;
+  }
+  for (size_t i = 0; i < result.boxes.size(); ++i) {
+    cv::Rect rect(result.boxes[i][0], result.boxes[i][1],
+                  result.boxes[i][2] - result.boxes[i][0],
+                  result.boxes[i][3] - result.boxes[i][1]);
+    int color_id = i % 333;
+    int c0 = color_map[3 * color_id + 0];
+    int c1 = color_map[3 * color_id + 1];
+    int c2 = color_map[3 * color_id + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string text = std::to_string(result.scores[i]);
+    if (text.size() > 4) {
+      text = text.substr(0, 4);
+    }
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+    cv::Point origin;
+    origin.x = rect.x;
+    origin.y = rect.y;
+    cv::Rect text_background =
+        cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height,
+                 text_size.width, text_size.height);
+    cv::rectangle(vis_im, rect, rect_color, line_size);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+    // vis landmarks (if have)
+    if (vis_landmarks) {
+      cv::Scalar landmark_color = rect_color;
+      for (size_t j = 0; j < result.landmarks_per_face; ++j) {
+        cv::Point landmark;
+        landmark.x = static_cast<int>(
+            result.landmarks[i * result.landmarks_per_face + j][0]);
+        landmark.y = static_cast<int>(
+            result.landmarks[i * result.landmarks_per_face + j][1]);
+        cv::circle(vis_im, landmark, line_size, landmark_color, -1);
+      }
+    }
+  }
+  return vis_im;
+}
+
+// Default only support visualize num_classes <= 1000
+// If need to visualize num_classes > 1000
+// Please call Visualize::GetColorMap(num_classes) first
+cv::Mat Visualize::VisFaceDetection(const cv::Mat &im,
+                                    const FaceDetectionResult &result,
+                                    int line_size, float font_size) {
+  FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisFaceDetection is "
+               "deprecated, please use ultrainfer::vision:VisFaceDetection "
+               "function instead."
+            << std::endl;
+  auto color_map = GetColorMap();
+  int h = im.rows;
+  int w = im.cols;
+
+  auto vis_im = im.clone();
+  bool vis_landmarks = false;
+  if ((result.landmarks_per_face > 0) &&
+      (result.boxes.size() * result.landmarks_per_face ==
+       result.landmarks.size())) {
+    vis_landmarks = true;
+  }
+  for (size_t i = 0; i < result.boxes.size(); ++i) {
+    cv::Rect rect(result.boxes[i][0], result.boxes[i][1],
+                  result.boxes[i][2] - result.boxes[i][0],
+                  result.boxes[i][3] - result.boxes[i][1]);
+    int color_id = i % 333;
+    int c0 = color_map[3 * color_id + 0];
+    int c1 = color_map[3 * color_id + 1];
+    int c2 = color_map[3 * color_id + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string text = std::to_string(result.scores[i]);
+    if (text.size() > 4) {
+      text = text.substr(0, 4);
+    }
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+    cv::Point origin;
+    origin.x = rect.x;
+    origin.y = rect.y;
+    cv::Rect text_background =
+        cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height,
+                 text_size.width, text_size.height);
+    cv::rectangle(vis_im, rect, rect_color, line_size);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+    // vis landmarks (if have)
+    if (vis_landmarks) {
+      cv::Scalar landmark_color = rect_color;
+      for (size_t j = 0; j < result.landmarks_per_face; ++j) {
+        cv::Point landmark;
+        landmark.x = static_cast<int>(
+            result.landmarks[i * result.landmarks_per_face + j][0]);
+        landmark.y = static_cast<int>(
+            result.landmarks[i * result.landmarks_per_face + j][1]);
+        cv::circle(vis_im, landmark, line_size, landmark_color, -1);
+      }
+    }
+  }
+  return vis_im;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc b/libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc
new file mode 100755
index 0000000000..4ba2420992
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/headpose.cc
@@ -0,0 +1,62 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+
+namespace vision {
+
+cv::Mat VisHeadPose(const cv::Mat &im, const HeadPoseResult &result, int size,
+                    int line_size) {
+  const float PI = 3.1415926535;
+  auto vis_im = im.clone();
+  int h = im.rows;
+  int w = im.cols;
+  // vis headpose
+  float pitch = result.euler_angles[0] * PI / 180.f;
+  float yaw = -result.euler_angles[1] * PI / 180.f;
+  float roll = result.euler_angles[2] * PI / 180.f;
+
+  int tdx = w / 2;
+  int tdy = h / 2;
+
+  // X-Axis | drawn in red
+  int x1 = static_cast<int>(size * std::cos(yaw) * std::cos(roll)) + tdx;
+  int y1 = static_cast<int>(
+               size * (std::cos(pitch) * std::sin(roll) +
+                       std::cos(roll) * std::sin(pitch) * std::sin(yaw))) +
+           tdy;
+  // Y-Axis | drawn in green
+  int x2 = static_cast<int>(-size * std::cos(yaw) * std::sin(roll)) + tdx;
+  int y2 = static_cast<int>(
+               size * (std::cos(pitch) * std::cos(roll) -
+                       std::sin(pitch) * std::sin(yaw) * std::sin(roll))) +
+           tdy;
+  // Z-Axis | drawn in blue
+  int x3 = static_cast<int>(size * std::sin(yaw)) + tdx;
+  int y3 = static_cast<int>(-size * std::cos(yaw) * std::sin(pitch)) + tdy;
+
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1),
+           cv::Scalar(0, 0, 255), line_size);
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2),
+           cv::Scalar(0, 255, 0), line_size);
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3),
+           cv::Scalar(255, 0, 0), line_size);
+  return vis_im;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc b/libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc
new file mode 100755
index 0000000000..20c124ba5b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/keypoint.cc
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat VisKeypointDetection(const cv::Mat &im,
+                             const KeyPointDetectionResult &results,
+                             float conf_threshold) {
+  const int edge[][2] = {{0, 1},   {0, 2},  {1, 3},   {2, 4},   {3, 5},
+                         {4, 6},   {5, 7},  {6, 8},   {7, 9},   {8, 10},
+                         {5, 11},  {6, 12}, {11, 13}, {12, 14}, {13, 15},
+                         {14, 16}, {11, 12}};
+  auto colormap = GenerateColorMap();
+  cv::Mat vis_img = im.clone();
+  int detection_nums = results.keypoints.size() / 17;
+  for (int i = 0; i < detection_nums; i++) {
+    int index = i * 17;
+    bool is_over_threshold = true;
+    for (int j = 0; j < results.num_joints; j++) {
+      if (results.scores[index + j] < conf_threshold) {
+        is_over_threshold = false;
+        break;
+      }
+    }
+    if (is_over_threshold) {
+      for (int k = 0; k < results.num_joints; k++) {
+        int x_coord = int(results.keypoints[index + k][0]);
+        int y_coord = int(results.keypoints[index + k][1]);
+        cv::circle(vis_img, cv::Point2d(x_coord, y_coord), 1,
+                   cv::Scalar(0, 0, 255), 2);
+        int x_start = int(results.keypoints[index + edge[k][0]][0]);
+        int y_start = int(results.keypoints[index + edge[k][0]][1]);
+        int x_end = int(results.keypoints[index + edge[k][1]][0]);
+        int y_end = int(results.keypoints[index + edge[k][1]][1]);
+        cv::line(vis_img, cv::Point2d(x_start, y_start),
+                 cv::Point2d(x_end, y_end), colormap[k], 1);
+      }
+    }
+  }
+  return vis_img;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/matting.cc b/libs/ultrainfer/ultrainfer/vision/visualize/matting.cc
new file mode 100755
index 0000000000..03ef71d802
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/matting.cc
@@ -0,0 +1,152 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat VisMatting(const cv::Mat &im, const MattingResult &result,
+                   bool transparent_background, float transparent_threshold,
+                   bool remove_small_connected_area) {
+  FDASSERT((!im.empty()), "im can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels mat!");
+  auto vis_img = im.clone();
+  cv::Mat transparent_vis_mat;
+  int channel = im.channels();
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  std::vector<float> alpha_copy;
+  alpha_copy.assign(result.alpha.begin(), result.alpha.end());
+  float *alpha_ptr = static_cast<float *>(alpha_copy.data());
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  if (remove_small_connected_area) {
+    alpha = RemoveSmallConnectedArea(alpha, 0.05f);
+  }
+  if ((out_h != height) || (out_w != width)) {
+    cv::resize(alpha, alpha, cv::Size(width, height));
+  }
+
+  if ((vis_img).type() != CV_8UC3) {
+    (vis_img).convertTo((vis_img), CV_8UC3);
+  }
+
+  if (transparent_background) {
+    if (vis_img.channels() != 4) {
+      cv::cvtColor(vis_img, transparent_vis_mat, cv::COLOR_BGR2BGRA);
+      vis_img = transparent_vis_mat;
+      channel = 4;
+    }
+  }
+
+  uchar *vis_data = static_cast<uchar *>(vis_img.data);
+  uchar *im_data = static_cast<uchar *>(im.data);
+  float *alpha_data = reinterpret_cast<float *>(alpha.data);
+
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      float alpha_val = alpha_data[i * width + j];
+      if (transparent_background) {
+        if (alpha_val < transparent_threshold) {
+          vis_data[i * width * channel + j * channel + 3] =
+              cv::saturate_cast<uchar>(0.f);
+        } else {
+          vis_data[i * width * channel + j * channel + 0] =
+              cv::saturate_cast<uchar>(
+                  static_cast<float>(im_data[i * width * 3 + j * 3 + 0]));
+          vis_data[i * width * channel + j * channel + 1] =
+              cv::saturate_cast<uchar>(
+                  static_cast<float>(im_data[i * width * 3 + j * 3 + 1]));
+          vis_data[i * width * channel + j * channel + 2] =
+              cv::saturate_cast<uchar>(
+                  static_cast<float>(im_data[i * width * 3 + j * 3 + 2]));
+        }
+      } else {
+        vis_data[i * width * channel + j * channel + 0] =
+            cv::saturate_cast<uchar>(
+                static_cast<float>(im_data[i * width * 3 + j * 3 + 0]) *
+                    alpha_val +
+                (1.f - alpha_val) * 153.f);
+        vis_data[i * width * channel + j * channel + 1] =
+            cv::saturate_cast<uchar>(
+                static_cast<float>(im_data[i * width * 3 + j * 3 + 1]) *
+                    alpha_val +
+                (1.f - alpha_val) * 255.f);
+        vis_data[i * width * channel + j * channel + 2] =
+            cv::saturate_cast<uchar>(
+                static_cast<float>(im_data[i * width * 3 + j * 3 + 2]) *
+                    alpha_val +
+                (1.f - alpha_val) * 120.f);
+      }
+    }
+  }
+  return vis_img;
+}
+
+cv::Mat Visualize::VisMattingAlpha(const cv::Mat &im,
+                                   const MattingResult &result,
+                                   bool remove_small_connected_area) {
+  FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisMattingAlpha is "
+               "deprecated, please use ultrainfer::vision:VisMatting function "
+               "instead."
+            << std::endl;
+  FDASSERT((!im.empty()), "im can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels mat!");
+
+  auto vis_img = im.clone();
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  std::vector<float> alpha_copy;
+  alpha_copy.assign(result.alpha.begin(), result.alpha.end());
+  float *alpha_ptr = static_cast<float *>(alpha_copy.data());
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  if (remove_small_connected_area) {
+    alpha = RemoveSmallConnectedArea(alpha, 0.05f);
+  }
+  if ((out_h != height) || (out_w != width)) {
+    cv::resize(alpha, alpha, cv::Size(width, height));
+  }
+
+  if ((vis_img).type() != CV_8UC3) {
+    (vis_img).convertTo((vis_img), CV_8UC3);
+  }
+
+  uchar *vis_data = static_cast<uchar *>(vis_img.data);
+  uchar *im_data = static_cast<uchar *>(im.data);
+  float *alpha_data = reinterpret_cast<float *>(alpha.data);
+
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      float alpha_val = alpha_data[i * width + j];
+      vis_data[i * width * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * width * 3 + j * 3 + 0]) * alpha_val +
+          (1.f - alpha_val) * 153.f);
+      vis_data[i * width * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * width * 3 + j * 3 + 1]) * alpha_val +
+          (1.f - alpha_val) * 255.f);
+      vis_data[i * width * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * width * 3 + j * 3 + 2]) * alpha_val +
+          (1.f - alpha_val) * 120.f);
+    }
+  }
+  return vis_img;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/mot.cc b/libs/ultrainfer/ultrainfer/vision/visualize/mot.cc
new file mode 100755
index 0000000000..215248f901
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/mot.cc
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <iomanip>
+
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Scalar GetMOTBoxColor(int idx) {
+  idx = idx * 3;
+  cv::Scalar color =
+      cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
+  return color;
+}
+
+cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results,
+               float score_threshold, tracking::TrailRecorder *recorder) {
+  cv::Mat vis_img = img.clone();
+  int im_h = img.rows;
+  int im_w = img.cols;
+  float text_scale = std::max(1, static_cast<int>(im_w / 1600.));
+  float text_thickness = 2.;
+  float line_thickness = std::max(1, static_cast<int>(im_w / 500.));
+  for (int i = 0; i < results.boxes.size(); ++i) {
+    if (results.scores[i] < score_threshold) {
+      continue;
+    }
+    const int obj_id = results.ids[i];
+    const float score = results.scores[i];
+    cv::Scalar color = GetMOTBoxColor(obj_id);
+    if (recorder != nullptr) {
+      int id = results.ids[i];
+      auto iter = recorder->records.find(id);
+      if (iter != recorder->records.end()) {
+        for (int j = 0; j < iter->second.size(); j++) {
+          cv::Point center(iter->second[j][0], iter->second[j][1]);
+          cv::circle(vis_img, center, text_thickness, color);
+        }
+      }
+    }
+    cv::Point pt1 = cv::Point(results.boxes[i][0], results.boxes[i][1]);
+    cv::Point pt2 = cv::Point(results.boxes[i][2], results.boxes[i][3]);
+    cv::Point id_pt = cv::Point(results.boxes[i][0], results.boxes[i][1] + 10);
+    cv::Point score_pt =
+        cv::Point(results.boxes[i][0], results.boxes[i][1] - 10);
+    cv::rectangle(vis_img, pt1, pt2, color, line_thickness);
+    std::ostringstream idoss;
+    idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
+    idoss << obj_id;
+    std::string id_text = idoss.str();
+
+    cv::putText(vis_img, id_text, id_pt, cv::FONT_HERSHEY_PLAIN, text_scale,
+                color, text_thickness);
+
+    std::ostringstream soss;
+    soss << std::setiosflags(std::ios::fixed) << std::setprecision(2);
+    soss << score;
+    std::string score_text = soss.str();
+
+    cv::putText(vis_img, score_text, score_pt, cv::FONT_HERSHEY_PLAIN,
+                text_scale, color, text_thickness);
+  }
+  return vis_img;
+}
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc b/libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc
new file mode 100755
index 0000000000..bafd017528
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/ocr.cc
@@ -0,0 +1,122 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result,
+               const float score_threshold) {
+  auto vis_im = im.clone();
+  bool have_score = (ocr_result.boxes.size() == ocr_result.rec_scores.size());
+
+  for (int n = 0; n < ocr_result.boxes.size(); n++) {
+    if (have_score) {
+      if (ocr_result.rec_scores[n] < score_threshold) {
+        continue;
+      }
+    }
+    cv::Point rook_points[4];
+
+    for (int m = 0; m < 4; m++) {
+      rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]),
+                                 int(ocr_result.boxes[n][m * 2 + 1]));
+    }
+
+    const cv::Point *ppt[1] = {rook_points};
+    int npt[] = {4};
+    cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+  }
+
+  return vis_im;
+}
+
+cv::Mat VisCURVEOcr(const cv::Mat &im, const OCRCURVEResult &ocr_result,
+                    const float score_threshold) {
+  auto vis_im = im.clone();
+  bool have_score = (ocr_result.boxes.size() == ocr_result.rec_scores.size());
+
+  for (int n = 0; n < ocr_result.boxes.size(); n++) {
+    if (have_score) {
+      if (ocr_result.rec_scores[n] < score_threshold) {
+        continue;
+      }
+    }
+    std::vector<cv::Point> rook_points;
+
+    for (int m = 0; m < ocr_result.boxes[n].size() / 2; m++) {
+      rook_points.push_back(cv::Point(int(ocr_result.boxes[n][m * 2]),
+                                      int(ocr_result.boxes[n][m * 2 + 1])));
+    }
+
+    if (!rook_points.empty()) {
+      cv::Point *ppt = &rook_points[0];
+      int npt = static_cast<int>(rook_points.size());
+      cv::polylines(vis_im, &ppt, &npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+    }
+  }
+
+  return vis_im;
+}
+
+cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
+  FDWARNING
+      << "DEPRECATED: ultrainfer::vision::Visualize::VisOcr is deprecated, "
+         "please use ultrainfer::vision:VisOcr function instead."
+      << std::endl;
+  auto vis_im = im.clone();
+
+  for (int n = 0; n < ocr_result.boxes.size(); n++) {
+    cv::Point rook_points[4];
+
+    for (int m = 0; m < 4; m++) {
+      rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]),
+                                 int(ocr_result.boxes[n][m * 2 + 1]));
+    }
+
+    const cv::Point *ppt[1] = {rook_points};
+    int npt[] = {4};
+    cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+  }
+
+  return vis_im;
+}
+
+cv::Mat Visualize::VisCURVEOcr(const cv::Mat &im,
+                               const OCRCURVEResult &ocr_result) {
+  FDWARNING
+      << "DEPRECATED: ultrainfer::vision::Visualize::VisOcr is deprecated, "
+         "please use ultrainfer::vision:VisOcr function instead."
+      << std::endl;
+  auto vis_im = im.clone();
+
+  for (int n = 0; n < ocr_result.boxes.size(); n++) {
+    std::vector<cv::Point> rook_points;
+    for (int m = 0; m < ocr_result.boxes[n].size() / 2; m++) {
+      rook_points.push_back(cv::Point(int(ocr_result.boxes[n][m * 2]),
+                                      int(ocr_result.boxes[n][m * 2 + 1])));
+    }
+    if (!rook_points.empty()) {
+      cv::Point *ppt = &rook_points[0];
+      int npt = static_cast<int>(rook_points.size());
+      cv::polylines(vis_im, &ppt, &npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+    }
+  }
+
+  return vis_im;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/perception.cc b/libs/ultrainfer/ultrainfer/vision/visualize/perception.cc
new file mode 100755
index 0000000000..8f41786d13
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/perception.cc
@@ -0,0 +1,195 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+#include "yaml-cpp/yaml.h"
+
+namespace ultrainfer {
+namespace vision {
+
+using matrix = std::vector<std::vector<float>>;
+
+matrix Multiple(const matrix a, const matrix b) {
+  const int m = a.size(); // a rows
+  if (m == 0) {
+    matrix c;
+    return c;
+  }
+  if (a[0].size() != b.size()) {
+    FDERROR << "A[m,n] * B[p,q], n must equal to p." << std::endl;
+    matrix c;
+    return c;
+  }
+  const int n = a[0].size(); // a cols
+  const int p = b[0].size(); // b cols
+  matrix c(m, std::vector<float>(p, 0));
+  for (auto i = 0; i < m; i++) {
+    for (auto j = 0; j < p; j++) {
+      for (auto k = 0; k < n; k++)
+        c[i][j] += a[i][k] * b[k][j];
+    }
+  }
+  return c;
+}
+
+cv::Mat VisPerception(const cv::Mat &im, const PerceptionResult &result,
+                      const std::string &config_file, float score_threshold,
+                      int line_size, float font_size) {
+  if (result.scores.empty()) {
+    return im;
+  }
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file);
+  } catch (YAML::BadFile &e) {
+    FDERROR << "Failed to load yaml file " << config_file
+            << ", maybe you should check this file." << std::endl;
+    return im;
+  }
+
+  std::vector<int> target_size;
+  for (const auto &op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "Resize") {
+      target_size = op["target_size"].as<std::vector<int>>();
+    }
+  }
+
+  std::vector<float> vec_k_data = cfg["k_data"].as<std::vector<float>>();
+  if (vec_k_data.size() != 9) {
+    FDERROR
+        << "The K data load from the yaml file: " << config_file
+        << " is unexpected, the expected size is 9, but the loaded size is: "
+        << vec_k_data.size() << " ,maybe you should check this file."
+        << std::endl;
+    return im;
+  }
+  matrix k_data(3, std::vector<float>());
+  for (auto j = 0; j < 3; j++) {
+    k_data[j].insert(k_data[j].begin(), vec_k_data.begin() + j * 3,
+                     vec_k_data.begin() + j * 3 + 3);
+  }
+
+  std::vector<double> rvec = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
+  std::vector<double> tvec = {0, 0, 0};
+
+  matrix connect_line_id = {{1, 0}, {2, 7}, {3, 6}, {4, 5}, {1, 2}, {2, 3},
+                            {3, 4}, {4, 1}, {0, 7}, {7, 6}, {6, 5}, {5, 0}};
+
+  int max_label_id =
+      *std::max_element(result.label_ids.begin(), result.label_ids.end());
+  std::vector<int> color_map = GenerateColorMap(max_label_id);
+  int h = im.rows;
+  int w = im.cols;
+  cv::Mat vis_im = im.clone();
+  cv::resize(im, vis_im, cv::Size(target_size[1], target_size[0]), 0, 0, 0);
+  for (size_t i = 0; i < result.scores.size(); ++i) {
+    if (result.scores[i] < 0.5) {
+      continue;
+    }
+    float h = result.boxes[i][4];
+    float w = result.boxes[i][5];
+    float l = result.boxes[i][6];
+
+    float x = result.center[i][0];
+    float y = result.center[i][1];
+    float z = result.center[i][2];
+    std::vector<float> x_corners = {0, l, l, l, l, 0, 0, 0};
+    std::vector<float> y_corners = {0, 0, h, h, 0, 0, h, h};
+    std::vector<float> z_corners = {0, 0, 0, w, w, w, w, 0};
+
+    for (auto j = 0; j < x_corners.size(); j++) {
+      x_corners[j] = x_corners[j] - l / 2;
+      y_corners[j] = y_corners[j] - h;
+      z_corners[j] = z_corners[j] - w / 2;
+    }
+
+    matrix corners_3d = {x_corners, y_corners, z_corners};
+
+    float ry = result.yaw_angle[i];
+    matrix rot_mat = {
+        {cosf(ry), 0, sinf(ry)}, {0, 1, 0}, {sinf(ry), 0, cosf(ry)}};
+
+    matrix rot_corners_3d = Multiple(rot_mat, corners_3d);
+
+    for (auto j = 0; j < rot_corners_3d[0].size(); j++) {
+      rot_corners_3d[0][j] += x;
+      rot_corners_3d[1][j] += y;
+      rot_corners_3d[2][j] += z;
+    }
+
+    auto corners_2d = Multiple(k_data, rot_corners_3d);
+
+    for (auto j = 0; j < corners_2d[0].size(); j++) {
+      corners_2d[0][j] /= corners_2d[2][j];
+      corners_2d[1][j] /= corners_2d[2][j];
+    }
+
+    std::vector<float> box2d = {
+        *std::min_element(corners_2d[0].begin(), corners_2d[0].end()),
+        *std::min_element(corners_2d[1].begin(), corners_2d[1].end()),
+        *std::max_element(corners_2d[0].begin(), corners_2d[0].end()),
+        *std::max_element(corners_2d[1].begin(), corners_2d[1].end())};
+
+    if (box2d[0] == 0 && box2d[1] == 0 && box2d[2] == 0 && box2d[3] == 0) {
+      continue;
+    }
+
+    std::vector<cv::Point3f> points3d;
+    for (auto j = 0; j < rot_corners_3d[0].size(); j++) {
+      points3d.push_back(cv::Point3f(rot_corners_3d[0][j], rot_corners_3d[1][j],
+                                     rot_corners_3d[2][j]));
+    }
+    cv::Mat rVec(3, 3, cv::DataType<double>::type, rvec.data());
+    cv::Mat tVec(3, 1, cv::DataType<double>::type, tvec.data());
+    std::vector<float> vec_k;
+    for (auto &&v : k_data) {
+      vec_k.insert(vec_k.end(), v.begin(), v.end());
+    }
+    cv::Mat intrinsicMat(3, 3, cv::DataType<float>::type, vec_k.data());
+    cv::Mat distCoeffs(5, 1, cv::DataType<double>::type);
+    std::vector<cv::Point2f> projectedPoints;
+    cv::projectPoints(points3d, rVec, tVec, intrinsicMat, distCoeffs,
+                      projectedPoints);
+
+    int c0 = color_map[3 * result.label_ids[i] + 0];
+    int c1 = color_map[3 * result.label_ids[i] + 1];
+    int c2 = color_map[3 * result.label_ids[i] + 2];
+    cv::Scalar color = cv::Scalar(c0, c1, c2);
+    for (auto id = 0; id < connect_line_id.size(); id++) {
+      int p1 = connect_line_id[id][0];
+      int p2 = connect_line_id[id][1];
+      cv::line(vis_im, projectedPoints[p1], projectedPoints[p2], color, 1);
+    }
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = std::to_string(result.label_ids[i]) + ", " + score;
+    cv::Point2f original;
+    original.x = box2d[0];
+    original.y = box2d[1];
+    cv::putText(vis_im, text, original, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+  }
+  return vis_im;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc b/libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc
new file mode 100755
index 0000000000..5b5e55c38b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/remove_small_connnected_area.cc
@@ -0,0 +1,112 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat RemoveSmallConnectedArea(const cv::Mat &alpha_pred, float threshold) {
+  cv::Mat gray, binary;
+  alpha_pred.convertTo(gray, CV_8UC1, 255.f);
+  cv::Mat alpha_pred_clone = alpha_pred.clone();
+  // 255 * 0.05 ~ 13
+  unsigned int binary_threshold = static_cast<unsigned int>(255.f * threshold);
+  cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY);
+  // morphologyEx with OPEN operation to remove noise first.
+  auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3),
+                                          cv::Point(-1, -1));
+  cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel);
+  // Computationally connected domain
+  cv::Mat labels = cv::Mat::zeros(alpha_pred_clone.size(), CV_32S);
+  cv::Mat stats, centroids;
+  int num_labels =
+      cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4);
+  if (num_labels <= 1) {
+    // no noise, skip.
+    return alpha_pred;
+  }
+  // find max connected area, 0 is background
+  int max_connected_id = 1; // 1,2,...
+  int max_connected_area = stats.at<int>(max_connected_id, cv::CC_STAT_AREA);
+  for (int i = 1; i < num_labels; ++i) {
+    int tmp_connected_area = stats.at<int>(i, cv::CC_STAT_AREA);
+    if (tmp_connected_area > max_connected_area) {
+      max_connected_area = tmp_connected_area;
+      max_connected_id = i;
+    }
+  }
+  const int h = alpha_pred_clone.rows;
+  const int w = alpha_pred_clone.cols;
+  // remove small connected area.
+  for (int i = 0; i < h; ++i) {
+    int *label_row_ptr = labels.ptr<int>(i);
+    float *alpha_row_ptr = alpha_pred_clone.ptr<float>(i);
+    for (int j = 0; j < w; ++j) {
+      if (label_row_ptr[j] != max_connected_id)
+        alpha_row_ptr[j] = 0.f;
+    }
+  }
+  return alpha_pred_clone;
+}
+
+cv::Mat Visualize::RemoveSmallConnectedArea(const cv::Mat &alpha_pred,
+                                            float threshold) {
+  cv::Mat gray, binary;
+  alpha_pred.convertTo(gray, CV_8UC1, 255.f);
+  cv::Mat alpha_pred_clone = alpha_pred.clone();
+  // 255 * 0.05 ~ 13
+  unsigned int binary_threshold = static_cast<unsigned int>(255.f * threshold);
+  cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY);
+  // morphologyEx with OPEN operation to remove noise first.
+  auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3),
+                                          cv::Point(-1, -1));
+  cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel);
+  // Computationally connected domain
+  cv::Mat labels = cv::Mat::zeros(alpha_pred_clone.size(), CV_32S);
+  cv::Mat stats, centroids;
+  int num_labels =
+      cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4);
+  if (num_labels <= 1) {
+    // no noise, skip.
+    return alpha_pred;
+  }
+  // find max connected area, 0 is background
+  int max_connected_id = 1; // 1,2,...
+  int max_connected_area = stats.at<int>(max_connected_id, cv::CC_STAT_AREA);
+  for (int i = 1; i < num_labels; ++i) {
+    int tmp_connected_area = stats.at<int>(i, cv::CC_STAT_AREA);
+    if (tmp_connected_area > max_connected_area) {
+      max_connected_area = tmp_connected_area;
+      max_connected_id = i;
+    }
+  }
+  const int h = alpha_pred_clone.rows;
+  const int w = alpha_pred_clone.cols;
+  // remove small connected area.
+  for (int i = 0; i < h; ++i) {
+    int *label_row_ptr = labels.ptr<int>(i);
+    float *alpha_row_ptr = alpha_pred_clone.ptr<float>(i);
+    for (int j = 0; j < w; ++j) {
+      if (label_row_ptr[j] != max_connected_id)
+        alpha_row_ptr[j] = 0.f;
+    }
+  }
+  return alpha_pred_clone;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc
new file mode 100755
index 0000000000..fc4425816f
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation.cc
@@ -0,0 +1,75 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/visualize/segmentation_arm.h"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+static cv::Mat VisSegmentationCommonCpu(const cv::Mat &im,
+                                        const SegmentationResult &result,
+                                        float weight) {
+  // Use the native c++ version without any optimization.
+  auto color_map = GenerateColorMap(1000);
+  int64_t height = result.shape[0];
+  int64_t width = result.shape[1];
+  auto vis_img = cv::Mat(height, width, CV_8UC3);
+
+  int64_t index = 0;
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      int category_id = result.label_map[index++];
+      if (category_id == 0) {
+        vis_img.at<cv::Vec3b>(i, j)[0] = im.at<cv::Vec3b>(i, j)[0];
+        vis_img.at<cv::Vec3b>(i, j)[1] = im.at<cv::Vec3b>(i, j)[1];
+        vis_img.at<cv::Vec3b>(i, j)[2] = im.at<cv::Vec3b>(i, j)[2];
+      } else {
+        vis_img.at<cv::Vec3b>(i, j)[0] = color_map[3 * category_id + 0];
+        vis_img.at<cv::Vec3b>(i, j)[1] = color_map[3 * category_id + 1];
+        vis_img.at<cv::Vec3b>(i, j)[2] = color_map[3 * category_id + 2];
+      }
+    }
+  }
+  cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img);
+  return vis_img;
+}
+
+cv::Mat VisSegmentation(const cv::Mat &im, const SegmentationResult &result,
+                        float weight) {
+  // TODO: Support SSE/AVX on x86_64 platforms
+#ifdef __ARM_NEON
+  return VisSegmentationNEON(im, result, weight, true);
+#else
+  return VisSegmentationCommonCpu(im, result, weight);
+#endif
+}
+
+cv::Mat Visualize::VisSegmentation(const cv::Mat &im,
+                                   const SegmentationResult &result) {
+  FDWARNING << "DEPRECATED: ultrainfer::vision::Visualize::VisSegmentation is "
+               "deprecated, please use ultrainfer::vision:VisSegmentation "
+               "function instead."
+            << std::endl;
+#ifdef __ARM_NEON
+  return VisSegmentationNEON(im, result, 0.5f, true);
+#else
+  return VisSegmentationCommonCpu(im, result, 0.5f);
+#endif
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc
new file mode 100755
index 0000000000..9d2defa74b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.cc
@@ -0,0 +1,177 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/visualize/segmentation_arm.h"
+#ifdef __ARM_NEON
+#include <arm_neon.h>
+#endif
+
+namespace ultrainfer {
+namespace vision {
+
+static constexpr int _OMP_THREADS = 2;
+
+static inline void QuantizeBlendingWeight8(float weight,
+                                           uint8_t *old_multi_factor,
+                                           uint8_t *new_multi_factor) {
+  // Quantize the weight to boost blending performance.
+  // if 0.0 < w <= 1/8, w ~ 1/8=1/(2^3) shift right 3 mul 1, 7
+  // if 1/8 < w <= 2/8, w ~ 2/8=1/(2^3) shift right 3 mul 2, 6
+  // if 2/8 < w <= 3/8, w ~ 3/8=1/(2^3) shift right 3 mul 3, 5
+  // if 3/8 < w <= 4/8, w ~ 4/8=1/(2^3) shift right 3 mul 4, 4
+  // Shift factor is always 3, but the mul factor is different.
+  // Moving 7 bits to the right tends to result in a zero value,
+  // So, We choose to shift 3 bits to get an approximation.
+  uint8_t weight_quantize = static_cast<uint8_t>(weight * 8.0f);
+  *new_multi_factor = weight_quantize;
+  *old_multi_factor = (8 - weight_quantize);
+}
+
+cv::Mat VisSegmentationNEON(const cv::Mat &im, const SegmentationResult &result,
+                            float weight, bool quantize_weight) {
+#ifndef __ARM_NEON
+  FDASSERT(false, "UltraInfer was not compiled with Arm NEON support!")
+#else
+  int64_t height = result.shape[0];
+  int64_t width = result.shape[1];
+  auto vis_img = cv::Mat(height, width, CV_8UC3);
+
+  int32_t size = static_cast<int32_t>(height * width);
+  uint8_t *vis_ptr = static_cast<uint8_t *>(vis_img.data);
+  const uint8_t *label_ptr =
+      static_cast<const uint8_t *>(result.label_map.data());
+  const uint8_t *im_ptr = static_cast<const uint8_t *>(im.data);
+
+  if (!quantize_weight) {
+    uint8x16_t zerox16 = vdupq_n_u8(0);
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+    for (int i = 0; i < size - 15; i += 16) {
+      uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes
+      uint8x16_t labelx16 = vld1q_u8(label_ptr + i);    // 16 bytes
+      uint8x16_t ibx16 = bgrx16x3.val[0];
+      uint8x16_t igx16 = bgrx16x3.val[1];
+      uint8x16_t irx16 = bgrx16x3.val[2];
+      // e.g 0b00000001 << 7 -> 0b10000000 128;
+      uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
+      uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
+      uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
+      uint8x16x3_t vbgrx16x3;
+      // Keep the pixels of input im if mask = 0
+      uint8x16_t cezx16 = vceqq_u8(labelx16, zerox16);
+      vbgrx16x3.val[0] = vorrq_u8(vandq_u8(cezx16, ibx16), mbx16);
+      vbgrx16x3.val[1] = vorrq_u8(vandq_u8(cezx16, igx16), mgx16);
+      vbgrx16x3.val[2] = vorrq_u8(vandq_u8(cezx16, irx16), mrx16);
+      vst3q_u8(vis_ptr + i * 3, vbgrx16x3);
+    }
+    for (int i = size - 15; i < size; i++) {
+      uint8_t label = label_ptr[i];
+      vis_ptr[i * 3 + 0] = (label << 7);
+      vis_ptr[i * 3 + 1] = (label << 4);
+      vis_ptr[i * 3 + 2] = (label << 3);
+    }
+    // Blend the colors use OpenCV
+    cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img);
+    return vis_img;
+  }
+
+  // Quantize the weight to boost blending performance.
+  // After that, we can directly use shift instructions
+  // to blend the colors from input im and mask. Please
+  // check QuantizeBlendingWeight8 for more details.
+  uint8_t old_multi_factor, new_multi_factor;
+  QuantizeBlendingWeight8(weight, &old_multi_factor, &new_multi_factor);
+  if (new_multi_factor == 0) {
+    return im; // Only keep origin image.
+  }
+
+  if (new_multi_factor == 8) {
+// Only keep mask, no need to blending with origin image.
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+    for (int i = 0; i < size - 15; i += 16) {
+      uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
+      // e.g 0b00000001 << 7 -> 0b10000000 128;
+      uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
+      uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
+      uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
+      uint8x16x3_t vbgr16x3;
+      vbgr16x3.val[0] = mbx16;
+      vbgr16x3.val[1] = mgx16;
+      vbgr16x3.val[2] = mrx16;
+      vst3q_u8(vis_ptr + i * 3, vbgr16x3);
+    }
+    for (int i = size - 15; i < size; i++) {
+      uint8_t label = label_ptr[i];
+      vis_ptr[i * 3 + 0] = (label << 7);
+      vis_ptr[i * 3 + 1] = (label << 4);
+      vis_ptr[i * 3 + 2] = (label << 3);
+    }
+    return vis_img;
+  }
+
+  uint8x16_t zerox16 = vdupq_n_u8(0);
+  uint8x16_t old_fx16 = vdupq_n_u8(old_multi_factor);
+  uint8x16_t new_fx16 = vdupq_n_u8(new_multi_factor);
+// Blend the two colors together with quantize 'weight'.
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+  for (int i = 0; i < size - 15; i += 16) {
+    uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes
+    uint8x16_t labelx16 = vld1q_u8(label_ptr + i);    // 16 bytes
+    uint8x16_t ibx16 = bgrx16x3.val[0];
+    uint8x16_t igx16 = bgrx16x3.val[1];
+    uint8x16_t irx16 = bgrx16x3.val[2];
+    // e.g 0b00000001 << 7 -> 0b10000000 128;
+    uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
+    uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
+    uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
+    // Moving 7 bits to the right tends to result in zero,
+    // So, We choose to shift 3 bits to get an approximation
+    uint8x16_t ibx16_mshr = vmulq_u8(vshrq_n_u8(ibx16, 3), old_fx16);
+    uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16);
+    uint8x16_t irx16_mshr = vmulq_u8(vshrq_n_u8(irx16, 3), old_fx16);
+    uint8x16_t mbx16_mshr = vmulq_u8(vshrq_n_u8(mbx16, 3), new_fx16);
+    uint8x16_t mgx16_mshr = vmulq_u8(vshrq_n_u8(mgx16, 3), new_fx16);
+    uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16);
+    uint8x16_t qbx16 = vqaddq_u8(ibx16_mshr, mbx16_mshr);
+    uint8x16_t qgx16 = vqaddq_u8(igx16_mshr, mgx16_mshr);
+    uint8x16_t qrx16 = vqaddq_u8(irx16_mshr, mrx16_mshr);
+    // Keep the pixels of input im if label = 0 (means mask = 0)
+    uint8x16_t cezx16 = vceqq_u8(labelx16, zerox16);
+    uint8x16_t abx16 = vandq_u8(cezx16, ibx16);
+    uint8x16_t agx16 = vandq_u8(cezx16, igx16);
+    uint8x16_t arx16 = vandq_u8(cezx16, irx16);
+    uint8x16x3_t vbgr16x3;
+    // Reset qx values to 0 if label is 0, then, keep mask values
+    // if label is not 0
+    uint8x16_t ncezx16 = vmvnq_u8(cezx16);
+    vbgr16x3.val[0] = vorrq_u8(abx16, vandq_u8(ncezx16, qbx16));
+    vbgr16x3.val[1] = vorrq_u8(agx16, vandq_u8(ncezx16, qgx16));
+    vbgr16x3.val[2] = vorrq_u8(arx16, vandq_u8(ncezx16, qrx16));
+    // Store the blended pixels to vis img
+    vst3q_u8(vis_ptr + i * 3, vbgr16x3);
+  }
+  for (int i = size - 15; i < size; i++) {
+    uint8_t label = label_ptr[i];
+    vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor +
+                         ((label << 7) >> 3) * new_multi_factor;
+    vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor +
+                         ((label << 4) >> 3) * new_multi_factor;
+    vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor +
+                         ((label << 3) >> 3) * new_multi_factor;
+  }
+  return vis_img;
+#endif
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h
new file mode 100755
index 0000000000..5e82c7859e
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/segmentation_arm.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat VisSegmentationNEON(const cv::Mat &im, const SegmentationResult &result,
+                            float weight, bool quantize_weight = true);
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc
new file mode 100755
index 0000000000..2f05c302a0
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background.cc
@@ -0,0 +1,180 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/utils/utils.h"
+#include "ultrainfer/vision/visualize/swap_background_arm.h"
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+static cv::Mat SwapBackgroundCommonCpu(const cv::Mat &im,
+                                       const cv::Mat &background,
+                                       const MattingResult &result,
+                                       bool remove_small_connected_area) {
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  auto vis_img = im.clone();
+  auto background_copy = background.clone();
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;
+  std::vector<float> alpha_copy;
+  alpha_copy.assign(result.alpha.begin(), result.alpha.end());
+  float *alpha_ptr = static_cast<float *>(alpha_copy.data());
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  if (remove_small_connected_area) {
+    alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f);
+  }
+  if ((vis_img).type() != CV_8UC3) {
+    (vis_img).convertTo((vis_img), CV_8UC3);
+  }
+  if ((background_copy).type() != CV_8UC3) {
+    (background_copy).convertTo((background_copy), CV_8UC3);
+  }
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_copy, cv::Size(width, height));
+  }
+  if ((out_h != height) || (out_w != width)) {
+    cv::resize(alpha, alpha, cv::Size(width, height));
+  }
+  uchar *vis_data = static_cast<uchar *>(vis_img.data);
+  uchar *background_data = static_cast<uchar *>(background_copy.data);
+  uchar *im_data = static_cast<uchar *>(im.data);
+  float *alpha_data = reinterpret_cast<float *>(alpha.data);
+
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      float alpha_val = alpha_data[i * width + j];
+      for (size_t c = 0; c < 3; ++c) {
+        vis_data[i * width * 3 + j * 3 + c] = cv::saturate_cast<uchar>(
+            static_cast<float>(im_data[i * width * 3 + j * 3 + c]) * alpha_val +
+            (1.f - alpha_val) * background_data[i * width * 3 + j * 3 + c]);
+      }
+    }
+  }
+
+  return vis_img;
+}
+
+static cv::Mat SwapBackgroundCommonCpu(const cv::Mat &im,
+                                       const cv::Mat &background,
+                                       const SegmentationResult &result,
+                                       int background_label) {
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  auto vis_img = im.clone();
+  auto background_copy = background.clone();
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;
+  if ((vis_img).type() != CV_8UC3) {
+    (vis_img).convertTo((vis_img), CV_8UC3);
+  }
+  if ((background_copy).type() != CV_8UC3) {
+    (background_copy).convertTo((background_copy), CV_8UC3);
+  }
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_copy, cv::Size(width, height));
+  }
+  uchar *vis_data = static_cast<uchar *>(vis_img.data);
+  uchar *background_data = static_cast<uchar *>(background_copy.data);
+  uchar *im_data = static_cast<uchar *>(im.data);
+  float keep_value = 0.f;
+
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      int category_id = result.label_map[i * width + j];
+      if (background_label != category_id) {
+        keep_value = 1.0f;
+      } else {
+        keep_value = 0.f;
+      }
+      for (size_t c = 0; c < 3; ++c) {
+        vis_data[i * width * 3 + j * 3 + c] = cv::saturate_cast<uchar>(
+            static_cast<float>(im_data[i * width * 3 + j * 3 + c]) *
+                keep_value +
+            (1.f - keep_value) * background_data[i * width * 3 + j * 3 + c]);
+      }
+    }
+  }
+
+  return vis_img;
+}
+
+// Public interfaces for SwapBackground.
+cv::Mat SwapBackground(const cv::Mat &im, const cv::Mat &background,
+                       const MattingResult &result,
+                       bool remove_small_connected_area) {
+  // TODO: Support SSE/AVX on x86_64 platforms
+#ifdef __ARM_NEON
+  return SwapBackgroundNEON(im, background, result,
+                            remove_small_connected_area);
+#else
+  return SwapBackgroundCommonCpu(im, background, result,
+                                 remove_small_connected_area);
+#endif
+}
+
+cv::Mat SwapBackground(const cv::Mat &im, const cv::Mat &background,
+                       const SegmentationResult &result, int background_label) {
+  // TODO: Support SSE/AVX on x86_64 platforms
+#ifdef __ARM_NEON
+  // return SwapBackgroundNEON(im, background, result, background_label);
+  return SwapBackgroundNEON(im, background, result, background_label);
+#else
+  return SwapBackgroundCommonCpu(im, background, result, background_label);
+#endif
+}
+
+// DEPRECATED
+cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat &im,
+                                         const cv::Mat &background,
+                                         const MattingResult &result,
+                                         bool remove_small_connected_area) {
+// TODO: Support SSE/AVX on x86_64 platforms
+#ifdef __ARM_NEON
+  return SwapBackgroundNEON(im, background, result,
+                            remove_small_connected_area);
+#else
+  return SwapBackgroundCommonCpu(im, background, result,
+                                 remove_small_connected_area);
+#endif
+}
+
+cv::Mat Visualize::SwapBackgroundSegmentation(
+    const cv::Mat &im, const cv::Mat &background, int background_label,
+    const SegmentationResult &result) {
+  // TODO: Support SSE/AVX on x86_64 platforms
+#ifdef __ARM_NEON
+  return SwapBackgroundNEON(im, background, result, background_label);
+#else
+  return SwapBackgroundCommonCpu(im, background, result, background_label);
+#endif
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc
new file mode 100755
index 0000000000..1382ef6fcb
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.cc
@@ -0,0 +1,238 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/visualize/swap_background_arm.h"
+
+#include "ultrainfer/vision/visualize/visualize.h"
+#ifdef __ARM_NEON
+#include <arm_neon.h>
+#endif
+#include "ultrainfer/utils/utils.h"
+
+namespace ultrainfer {
+namespace vision {
+
+static constexpr int _OMP_THREADS = 2;
+
+cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background,
+                           const MattingResult &result,
+                           bool remove_small_connected_area) {
+#ifndef __ARM_NEON
+  FDASSERT(false, "UltraInfer was not compiled with Arm NEON support!");
+#else
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;
+
+  // WARN: may change the original alpha
+  float *alpha_ptr = const_cast<float *>(result.alpha.data());
+
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  if (remove_small_connected_area) {
+    alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f);
+  }
+  auto vis_img = cv::Mat(height, width, CV_8UC3);
+
+  cv::Mat background_ref;
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_ref, cv::Size(width, height));
+  } else {
+    background_ref = background; // ref only
+  }
+  if ((background_ref).type() != CV_8UC3) {
+    (background_ref).convertTo((background_ref), CV_8UC3);
+  }
+
+  if ((out_h != height) || (out_w != width)) {
+    cv::resize(alpha, alpha, cv::Size(width, height));
+  }
+
+  uint8_t *vis_data = static_cast<uint8_t *>(vis_img.data);
+  const uint8_t *background_data =
+      static_cast<const uint8_t *>(background_ref.data);
+  const uint8_t *im_data = static_cast<const uint8_t *>(im.data);
+  const float *alpha_data = reinterpret_cast<const float *>(alpha.data);
+
+  const int32_t size = static_cast<int32_t>(height * width);
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+  for (int i = 0; i < size - 7; i += 8) {
+    uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3); // 24 bytes
+    // u8 -> u16 -> u32 -> f32
+    uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]);
+    uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]);
+    uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]);
+    uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3); // 24 bytes
+    uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]);
+    uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]);
+    uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]);
+
+    uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8));
+    uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8));
+    uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8));
+    uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8));
+    uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8));
+    uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8));
+
+    uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8));
+    uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8));
+    uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8));
+    uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8));
+    uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8));
+    uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8));
+
+    float32x4_t fhibx4 = vcvtq_f32_u32(hibx4);
+    float32x4_t fhigx4 = vcvtq_f32_u32(higx4);
+    float32x4_t fhirx4 = vcvtq_f32_u32(hirx4);
+    float32x4_t flibx4 = vcvtq_f32_u32(libx4);
+    float32x4_t fligx4 = vcvtq_f32_u32(ligx4);
+    float32x4_t flirx4 = vcvtq_f32_u32(lirx4);
+
+    float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4);
+    float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4);
+    float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4);
+    float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4);
+    float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4);
+    float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4);
+
+    // alpha load from little end
+    float32x4_t lalpx4 = vld1q_f32(alpha_data + i);     // low bits
+    float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4); // high bits
+    float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4);
+    float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4);
+
+    // blending
+    float32x4_t fhvbx4 =
+        vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4));
+    float32x4_t fhvgx4 =
+        vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4));
+    float32x4_t fhvrx4 =
+        vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4));
+    float32x4_t flvbx4 =
+        vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4));
+    float32x4_t flvgx4 =
+        vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4));
+    float32x4_t flvrx4 =
+        vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4));
+
+    // f32 -> u32 -> u16 -> u8
+    uint8x8x3_t vbgrx8x3;
+    // combine low 64 bits and high 64 bits into one 128 neon register
+    vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)),
+                                             vmovn_u32(vcvtq_u32_f32(fhvbx4))));
+    vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)),
+                                             vmovn_u32(vcvtq_u32_f32(fhvgx4))));
+    vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)),
+                                             vmovn_u32(vcvtq_u32_f32(fhvrx4))));
+    vst3_u8(vis_data + i * 3, vbgrx8x3);
+  }
+
+  for (int i = size - 7; i < size; i++) {
+    float alp = alpha_data[i];
+    for (int c = 0; c < 3; ++c) {
+      vis_data[i * 3 + 0] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * 3 + c]) * alp +
+          (1.0f - alp) * static_cast<float>(background_data[i * 3 + c]));
+    }
+  }
+
+  return vis_img;
+#endif
+}
+
+cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background,
+                           const SegmentationResult &result,
+                           int background_label) {
+#ifndef __ARM_NEON
+  FDASSERT(false, "UltraInfer was not compiled with Arm NEON support!")
+#else
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;
+  auto vis_img = cv::Mat(height, width, CV_8UC3);
+
+  cv::Mat background_ref;
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_ref, cv::Size(width, height));
+  } else {
+    background_ref = background; // ref only
+  }
+  if ((background_ref).type() != CV_8UC3) {
+    (background_ref).convertTo((background_ref), CV_8UC3);
+  }
+
+  uint8_t *vis_data = static_cast<uint8_t *>(vis_img.data);
+  const uint8_t *background_data =
+      static_cast<const uint8_t *>(background_ref.data);
+  const uint8_t *im_data = static_cast<const uint8_t *>(im.data);
+  const uint8_t *label_data =
+      static_cast<const uint8_t *>(result.label_map.data());
+
+  const uint8_t background_label_ = static_cast<uint8_t>(background_label);
+  const int32_t size = static_cast<int32_t>(height * width);
+
+  uint8x16_t backgroundx16 = vdupq_n_u8(background_label_);
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+  for (int i = 0; i < size - 15; i += 16) {
+    uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3); // 48 bytes
+    uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3);
+    uint8x16_t labelx16 = vld1q_u8(label_data + i); // 16 bytes
+    // Set mask bit = 1 if label != background_label
+    uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16);
+    uint8x16_t keepx16 = vmvnq_u8(nkeepx16); // keep_value = 1
+    uint8x16x3_t vbgr16x3;
+    vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16),
+                               vandq_u8(bbgr16x3.val[0], nkeepx16));
+    vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16),
+                               vandq_u8(bbgr16x3.val[1], nkeepx16));
+    vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16),
+                               vandq_u8(bbgr16x3.val[2], nkeepx16));
+    // Store the blended pixels to vis img
+    vst3q_u8(vis_data + i * 3, vbgr16x3);
+  }
+
+  for (int i = size - 15; i < size; i++) {
+    uint8_t label = label_data[i];
+    if (label != background_label_) {
+      vis_data[i * 3 + 0] = im_data[i * 3 + 0];
+      vis_data[i * 3 + 1] = im_data[i * 3 + 1];
+      vis_data[i * 3 + 2] = im_data[i * 3 + 2];
+    } else {
+      vis_data[i * 3 + 0] = background_data[i * 3 + 0];
+      vis_data[i * 3 + 1] = background_data[i * 3 + 1];
+      vis_data[i * 3 + 2] = background_data[i * 3 + 2];
+    }
+  }
+
+  return vis_img;
+#endif
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h
new file mode 100755
index 0000000000..5f614281c2
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/swap_background_arm.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/common/result.h"
+
+namespace ultrainfer {
+namespace vision {
+
+cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background,
+                           const MattingResult &result,
+                           bool remove_small_connected_area = false);
+
+cv::Mat SwapBackgroundNEON(const cv::Mat &im, const cv::Mat &background,
+                           const SegmentationResult &result,
+                           int background_label);
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc
new file mode 100755
index 0000000000..5e1da407c9
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/vision/visualize/visualize.h"
+
+namespace ultrainfer {
+namespace vision {
+
+static std::vector<int> global_fd_vis_color_map = std::vector<int>();
+
+std::vector<int> GenerateColorMap(int num_classes) {
+  if (num_classes < 10) {
+    num_classes = 10;
+  }
+  std::vector<int> color_map(num_classes * 3, 0);
+  for (int i = 0; i < num_classes; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
+    }
+  }
+  return color_map;
+}
+
+// This class will deprecated, please not use it
+int Visualize::num_classes_ = 0;
+std::vector<int> Visualize::color_map_ = std::vector<int>();
+
+const std::vector<int> &Visualize::GetColorMap(int num_classes) {
+  if (num_classes < num_classes_) {
+    return color_map_;
+  }
+  num_classes_ = num_classes;
+  std::vector<int>().swap(color_map_);
+  color_map_.resize(3 * num_classes_, 0);
+  for (int i = 0; i < num_classes_; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      color_map_[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      color_map_[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      color_map_[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
+    }
+  }
+  return color_map_;
+}
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/visualize.h b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.h
new file mode 100755
index 0000000000..921924279b
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/visualize.h
@@ -0,0 +1,251 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/imgproc/imgproc.hpp"
+#include "ultrainfer/vision/common/result.h"
+#include "ultrainfer/vision/tracking/pptracking/model.h"
+
+namespace ultrainfer {
+/** \brief All C++ UltraInfer Vision Models APIs are defined inside this
+ * namespace
+ *
+ */
+namespace vision {
+
+class ULTRAINFER_DECL Visualize {
+public:
+  static int num_classes_;
+  static std::vector<int> color_map_;
+  static const std::vector<int> &GetColorMap(int num_classes = 1000);
+  static cv::Mat VisDetection(const cv::Mat &im, const DetectionResult &result,
+                              float score_threshold = 0.0, int line_size = 1,
+                              float font_size = 0.5f);
+  static cv::Mat VisPerception(const cv::Mat &im,
+                               const PerceptionResult &result,
+                               const std::string &config_file,
+                               float score_threshold = 0.0, int line_size = 1,
+                               float font_size = 0.5f);
+  static cv::Mat VisFaceDetection(const cv::Mat &im,
+                                  const FaceDetectionResult &result,
+                                  int line_size = 1, float font_size = 0.5f);
+  static cv::Mat VisSegmentation(const cv::Mat &im,
+                                 const SegmentationResult &result);
+  static cv::Mat VisMattingAlpha(const cv::Mat &im, const MattingResult &result,
+                                 bool remove_small_connected_area = false);
+  static cv::Mat RemoveSmallConnectedArea(const cv::Mat &alpha_pred,
+                                          float threshold);
+  static cv::Mat
+  SwapBackgroundMatting(const cv::Mat &im, const cv::Mat &background,
+                        const MattingResult &result,
+                        bool remove_small_connected_area = false);
+  static cv::Mat SwapBackgroundSegmentation(const cv::Mat &im,
+                                            const cv::Mat &background,
+                                            int background_label,
+                                            const SegmentationResult &result);
+  static cv::Mat VisOcr(const cv::Mat &srcimg, const OCRResult &ocr_result);
+  static cv::Mat VisCURVEOcr(const cv::Mat &srcimg,
+                             const OCRCURVEResult &ocr_result);
+};
+
+std::vector<int> GenerateColorMap(int num_classes = 1000);
+cv::Mat RemoveSmallConnectedArea(const cv::Mat &alpha_pred, float threshold);
+/** \brief Show the visualized results for detection models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] score_threshold threshold for result scores, the bounding box will
+ * not be shown if the score is less than score_threshold \param[in] line_size
+ * line size for bounding boxes \param[in] font_size font size for text \return
+ * cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisDetection(const cv::Mat &im,
+                                     const DetectionResult &result,
+                                     float score_threshold = 0.0,
+                                     int line_size = 1, float font_size = 0.5f);
+/** \brief Show the visualized results with custom labels for detection models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] labels the visualized result will show the bounding box contain
+ * class label \param[in] score_threshold threshold for result scores, the
+ * bounding box will not be shown if the score is less than score_threshold
+ * \param[in] line_size line size for bounding boxes
+ * \param[in] font_size font size for text
+ * \param[in] font_color font color for bounding text
+ * \param[in] font_thickness font thickness for text
+ * \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisDetection(
+    const cv::Mat &im, const DetectionResult &result,
+    const std::vector<std::string> &labels, float score_threshold = 0.0,
+    int line_size = 1, float font_size = 0.5f,
+    std::vector<int> font_color = {255, 255, 255}, int font_thickness = 1);
+
+/** \brief Show the visualized results with custom labels for detection models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] labels the visualized result will show the bounding box contain
+ * class label \param[in] score_threshold threshold for result scores, the
+ * bounding box will not be shown if the score is less than score_threshold
+ * \param[in] line_size line size for bounding boxes
+ * \param[in] font_size font size for text
+ * \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat
+VisPerception(const cv::Mat &im, const PerceptionResult &result,
+              const std::string &config_file, float score_threshold = 0.0,
+              int line_size = 1, float font_size = 0.5f);
+/** \brief Show the visualized results for classification models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] top_k the length of return values, e.g., if topk==2, the result
+ * will include the 2 most possible class label for input image. \param[in]
+ * score_threshold threshold for top_k scores, the class will not be shown if
+ * the score is less than score_threshold \param[in] font_size font size \return
+ * cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisClassification(const cv::Mat &im,
+                                          const ClassifyResult &result,
+                                          int top_k = 5,
+                                          float score_threshold = 0.0f,
+                                          float font_size = 0.5f);
+/** \brief Show the visualized results with custom labels for classification
+ * models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] labels custom labels for user, the visualized result will show the
+ * corresponding custom labels \param[in] top_k the length of return values,
+ * e.g., if topk==2, the result will include the 2 most possible class label for
+ * input image. \param[in] score_threshold threshold for top_k scores, the class
+ * will not be shown if the score is less than score_threshold \param[in]
+ * font_size font size \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat
+VisClassification(const cv::Mat &im, const ClassifyResult &result,
+                  const std::vector<std::string> &labels, int top_k = 5,
+                  float score_threshold = 0.0f, float font_size = 0.5f);
+/** \brief Show the visualized results for face detection models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] line_size line size for bounding boxes
+ * \param[in] font_size font size for text
+ * \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisFaceDetection(const cv::Mat &im,
+                                         const FaceDetectionResult &result,
+                                         int line_size = 1,
+                                         float font_size = 0.5f);
+/** \brief Show the visualized results for face alignment models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] line_size line size for circle point
+ * \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisFaceAlignment(const cv::Mat &im,
+                                         const FaceAlignmentResult &result,
+                                         int line_size = 1);
+/** \brief Show the visualized results for segmentation models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] weight transparent weight of visualized result image
+ * \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisSegmentation(const cv::Mat &im,
+                                        const SegmentationResult &result,
+                                        float weight = 0.5);
+/** \brief Show the visualized results for matting models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \param[in] transparent_background if transparent_background==true, the
+ * background will with transparent color \param[in] transparent_threshold since
+ * the alpha value in MattringResult is a float between [0, 1],
+ * transparent_threshold is used to filter background pixel \param[in]
+ * remove_small_connected_area if remove_small_connected_area==true, the
+ * visualized result will not include the small connected areas \return cv::Mat
+ * type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisMatting(const cv::Mat &im,
+                                   const MattingResult &result,
+                                   bool transparent_background = false,
+                                   float transparent_threshold = 0.999,
+                                   bool remove_small_connected_area = false);
+/** \brief Show the visualized results for Ocr models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] result the result produced by model
+ * \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result,
+                               const float score_threshold = 0);
+ULTRAINFER_DECL cv::Mat VisCURVEOcr(const cv::Mat &im,
+                                    const OCRCURVEResult &ocr_result,
+                                    const float score_threshold = 0);
+
+ULTRAINFER_DECL cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results,
+                               float score_threshold = 0.0f,
+                               tracking::TrailRecorder *recorder = nullptr);
+/** \brief Swap the image background with MattingResult
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] background the background image data,
+ * comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+ * \param[in] result the MattingResult produced by model
+ * \param[in] remove_small_connected_area if remove_small_connected_area==true,
+ * the visualized result will not include the small connected areas \return
+ * cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat
+SwapBackground(const cv::Mat &im, const cv::Mat &background,
+               const MattingResult &result,
+               bool remove_small_connected_area = false);
+/** \brief Swap the image background with SegmentationResult
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] background the background image data,
+ * comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+ * \param[in] result the SegmentationResult produced by model
+ * \param[in] background_label the background label number in SegmentationResult
+ * \return cv::Mat type stores the visualized results
+ */
+ULTRAINFER_DECL cv::Mat SwapBackground(const cv::Mat &im,
+                                       const cv::Mat &background,
+                                       const SegmentationResult &result,
+                                       int background_label);
+
+/** \brief Show the visualized results for key point detection models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array
+ * with layout HWC, BGR format \param[in] results the result produced by model
+ * \param[in] conf_threshold threshold for result scores, the result will not be
+ * shown if the score is less than conf_threshold \return cv::Mat type stores
+ * the visualized results
+ */
+ULTRAINFER_DECL cv::Mat
+VisKeypointDetection(const cv::Mat &im, const KeyPointDetectionResult &results,
+                     float conf_threshold = 0.5f);
+ULTRAINFER_DECL cv::Mat VisHeadPose(const cv::Mat &im,
+                                    const HeadPoseResult &result, int size = 50,
+                                    int line_size = 1);
+
+} // namespace vision
+} // namespace ultrainfer
diff --git a/libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc b/libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc
new file mode 100755
index 0000000000..b6ce5131b5
--- /dev/null
+++ b/libs/ultrainfer/ultrainfer/vision/visualize/visualize_pybind.cc
@@ -0,0 +1,256 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ultrainfer/pybind/main.h"
+
+namespace ultrainfer {
+void BindVisualize(pybind11::module &m) {
+  m.def("vis_detection",
+        [](pybind11::array &im_data, vision::DetectionResult &result,
+           std::vector<std::string> &labels, float score_threshold,
+           int line_size, float font_size, std::vector<int> font_color,
+           int font_thickness) {
+          auto im = PyArrayToCvMat(im_data);
+          cv::Mat vis_im;
+          if (labels.empty()) {
+            vis_im = vision::VisDetection(im, result, score_threshold,
+                                          line_size, font_size);
+          } else {
+            vis_im = vision::VisDetection(im, result, labels, score_threshold,
+                                          line_size, font_size, font_color,
+                                          font_thickness);
+          }
+          FDTensor out;
+          vision::Mat(vis_im).ShareWithTensor(&out);
+          return TensorToPyArray(out);
+        })
+      .def("vis_perception",
+           [](pybind11::array &im_data, vision::PerceptionResult &result,
+              const std::string &config_file, float score_threshold,
+              int line_size, float font_size) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im =
+                 vision::VisPerception(im, result, config_file, score_threshold,
+                                       line_size, font_size);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_face_detection",
+           [](pybind11::array &im_data, vision::FaceDetectionResult &result,
+              int line_size, float font_size) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im =
+                 vision::VisFaceDetection(im, result, line_size, font_size);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_face_alignment",
+           [](pybind11::array &im_data, vision::FaceAlignmentResult &result,
+              int line_size) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisFaceAlignment(im, result, line_size);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_segmentation",
+           [](pybind11::array &im_data, vision::SegmentationResult &result,
+              float weight) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisSegmentation(im, result, weight);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("swap_background",
+           [](pybind11::array &im_data, pybind11::array &background_data,
+              vision::MattingResult &result, bool remove_small_connected_area) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             cv::Mat background = PyArrayToCvMat(background_data);
+             auto vis_im = vision::SwapBackground(im, background, result,
+                                                  remove_small_connected_area);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("swap_background",
+           [](pybind11::array &im_data, pybind11::array &background_data,
+              vision::SegmentationResult &result, int background_label) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             cv::Mat background = PyArrayToCvMat(background_data);
+             auto vis_im = vision::SwapBackground(im, background, result,
+                                                  background_label);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_ppocr",
+           [](pybind11::array &im_data, vision::OCRResult &result) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisOcr(im, result);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_ppocr_curve",
+           [](pybind11::array &im_data, vision::OCRCURVEResult &result) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisCURVEOcr(im, result);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_mot",
+           [](pybind11::array &im_data, vision::MOTResult &result,
+              float score_threshold, vision::tracking::TrailRecorder record) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisMOT(im, result, score_threshold, &record);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_matting",
+           [](pybind11::array &im_data, vision::MattingResult &result,
+              bool transparent_background, float transparent_threshold,
+              bool remove_small_connected_area) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisMatting(
+                 im, result, transparent_background, transparent_threshold,
+                 remove_small_connected_area);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_headpose",
+           [](pybind11::array &im_data, vision::HeadPoseResult &result,
+              int size, int line_size) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisHeadPose(im, result, size, line_size);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           });
+
+  pybind11::class_<vision::Visualize>(m, "Visualize")
+      .def(pybind11::init<>())
+      .def_static("vis_detection",
+                  [](pybind11::array &im_data, vision::DetectionResult &result,
+                     float score_threshold, int line_size, float font_size) {
+                    auto im = PyArrayToCvMat(im_data);
+                    auto vis_im = vision::Visualize::VisDetection(
+                        im, result, score_threshold, line_size, font_size);
+                    FDTensor out;
+                    vision::Mat(vis_im).ShareWithTensor(&out);
+                    return TensorToPyArray(out);
+                  })
+      .def_static(
+          "vis_keypoint_detection",
+          [](pybind11::array &im_data, vision::KeyPointDetectionResult &result,
+             float conf_threshold) {
+            auto im = PyArrayToCvMat(im_data);
+            auto vis_im =
+                vision::VisKeypointDetection(im, result, conf_threshold);
+            FDTensor out;
+            vision::Mat(vis_im).ShareWithTensor(&out);
+            return TensorToPyArray(out);
+          })
+      .def_static("vis_face_detection",
+                  [](pybind11::array &im_data,
+                     vision::FaceDetectionResult &result, int line_size,
+                     float font_size) {
+                    auto im = PyArrayToCvMat(im_data);
+                    auto vis_im = vision::Visualize::VisFaceDetection(
+                        im, result, line_size, font_size);
+                    FDTensor out;
+                    vision::Mat(vis_im).ShareWithTensor(&out);
+                    return TensorToPyArray(out);
+                  })
+      .def_static(
+          "vis_segmentation",
+          [](pybind11::array &im_data, vision::SegmentationResult &result) {
+            cv::Mat im = PyArrayToCvMat(im_data);
+            auto vis_im = vision::Visualize::VisSegmentation(im, result);
+            FDTensor out;
+            vision::Mat(vis_im).ShareWithTensor(&out);
+            return TensorToPyArray(out);
+          })
+      .def_static("swap_background_matting",
+                  [](pybind11::array &im_data, pybind11::array &background_data,
+                     vision::MattingResult &result,
+                     bool remove_small_connected_area) {
+                    cv::Mat im = PyArrayToCvMat(im_data);
+                    cv::Mat background = PyArrayToCvMat(background_data);
+                    auto vis_im = vision::Visualize::SwapBackgroundMatting(
+                        im, background, result, remove_small_connected_area);
+                    FDTensor out;
+                    vision::Mat(vis_im).ShareWithTensor(&out);
+                    return TensorToPyArray(out);
+                  })
+      .def_static("swap_background_segmentation",
+                  [](pybind11::array &im_data, pybind11::array &background_data,
+                     int background_label, vision::SegmentationResult &result) {
+                    cv::Mat im = PyArrayToCvMat(im_data);
+                    cv::Mat background = PyArrayToCvMat(background_data);
+                    auto vis_im = vision::Visualize::SwapBackgroundSegmentation(
+                        im, background, background_label, result);
+                    FDTensor out;
+                    vision::Mat(vis_im).ShareWithTensor(&out);
+                    return TensorToPyArray(out);
+                  })
+      .def_static("remove_small_connected_area",
+                  [](pybind11::array &alpha_pred_data, float threshold) {
+                    cv::Mat alpha_pred = PyArrayToCvMat(alpha_pred_data);
+                    auto vis_im = vision::Visualize::RemoveSmallConnectedArea(
+                        alpha_pred, threshold);
+                  })
+      .def_static("vis_ppocr",
+                  [](pybind11::array &im_data, vision::OCRResult &result) {
+                    auto im = PyArrayToCvMat(im_data);
+                    auto vis_im = vision::Visualize::VisOcr(im, result);
+                    FDTensor out;
+                    vision::Mat(vis_im).ShareWithTensor(&out);
+                    return TensorToPyArray(out);
+                  })
+      .def_static("vis_ppocr_curve",
+                  [](pybind11::array &im_data, vision::OCRCURVEResult &result) {
+                    auto im = PyArrayToCvMat(im_data);
+                    auto vis_im = vision::Visualize::VisCURVEOcr(im, result);
+                    FDTensor out;
+                    vision::Mat(vis_im).ShareWithTensor(&out);
+                    return TensorToPyArray(out);
+                  })
+      .def_static(
+          "vis_mot",
+          [](pybind11::array &im_data, vision::MOTResult &result,
+             float score_threshold, vision::tracking::TrailRecorder *record) {
+            auto im = PyArrayToCvMat(im_data);
+            auto vis_im = vision::VisMOT(im, result, score_threshold, record);
+            FDTensor out;
+            vision::Mat(vis_im).ShareWithTensor(&out);
+            return TensorToPyArray(out);
+          })
+      .def_static("vis_matting_alpha",
+                  [](pybind11::array &im_data, vision::MattingResult &result,
+                     bool remove_small_connected_area) {
+                    cv::Mat im = PyArrayToCvMat(im_data);
+                    auto vis_im = vision::Visualize::VisMattingAlpha(
+                        im, result, remove_small_connected_area);
+                    FDTensor out;
+                    vision::Mat(vis_im).ShareWithTensor(&out);
+                    return TensorToPyArray(out);
+                  });
+}
+} // namespace ultrainfer