本文使用的是pybind11来调用so和dll(pyd),实现0时间损失调用,以下会用yolov11 pose的tensorrt部署来举例
1.第一步下载pybind11
2.改cmakelists
(1)添加拉下来的pybind11-2.13的include
include_directories(
${TensorRT_INCLUDES_DIRS}
${CUDNN_INCLUDES}
${CUDA_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/include
${OpenCV_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/third/pybind11-2.13/include # Include pybind11 headers directly
)
(2) 添加python的编译头文件
# Specify Anaconda environment's Python interpreter path
set(Python3_EXECUTABLE "D:/anaconda3/envs/pytorch/python.exe") # Replace with your actual path
# Find Python3 components
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
# Include Python headers
include_directories(${Python3_INCLUDE_DIRS})
(3)链接头文件和源文件,并设置导出
# Collect source files
file(GLOB_RECURSE SOURCE_FILES src/*.cpp src/*.cu)
file(GLOB_RECURSE HEADER_FILES include/*.h include/*.cuh)
# Add pybind11 wrapper file
# Ensure that pybind_wrapper.cpp is included
list(APPEND SOURCE_FILES src/pybind_wrapper.cpp)
# Create Python module as shared library
add_library(CPDAI_module SHARED ${SOURCE_FILES} ${HEADER_FILES})
set_target_properties(CPDAI_module PROPERTIES
PREFIX "" # Remove default 'lib' prefix
SUFFIX ".pyd" # On Windows, Python modules have extension .pyd
RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/output" # Specify your desired output directory
RUNTIME_OUTPUT_NAME "CPDAI_module" # Name of the output file without extension
POSITION_INDEPENDENT_CODE ON # Ensure that the module is built with position-independent code
)
target_link_libraries(CPDAI_module PRIVATE
nvinfer
nvinfer_plugin
nvonnxparser
cudart
${OpenCV_LIBS}
nppc nppial nppicc nppidei nppif nppig nppim nppist nppisu nppitc
${Python3_LIBRARIES} # Link Python libraries
)
target_compile_definitions(CPDAI_module PRIVATE
DLL_API
)
3.输出文件的改写
这边我们的输出文件是这个pybind_wrapper.cpp,他调取yolov11这个类,得到一个结构体
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>
#include <opencv2/opencv.hpp>
#include "YoloPointDetection.h"
namespace py = pybind11;
PYBIND11_MODULE(CPDAI_module, m) {
py::class_<Box>(m, "Box")
.def(py::init<>())
.def_readwrite("x1", &Box::x1)
.def_readwrite("y1", &Box::y1)
.def_readwrite("x2", &Box::x2)
.def_readwrite("y2", &Box::y2)
.def_readwrite("cls_conf", &Box::cls_conf)
.def_readwrite("cls_id", &Box::cls_id)
.def_readwrite("point1_x", &Box::point1_x)
.def_readwrite("point1_y", &Box::point1_y)
.def_readwrite("point1_conf", &Box::point1_conf)
.def_readwrite("point2_x", &Box::point2_x)
.def_readwrite("point2_y", &Box::point2_y)
.def_readwrite("point2_conf", &Box::point2_conf);
py::class_<Yolov11_detection>(m, "Yolov11_detection")
.def(py::init<>())
.def("init_model", &Yolov11_detection::init_model)
.def("yolov11_Predict", [](Yolov11_detection &self, py::array_t<uint8_t> img_array, float conf_thresh, float nms_thresh) {
// 将 NumPy 数组转换为 cv::Mat
py::buffer_info buf = img_array.request();
if (buf.ndim != 3 || buf.shape[2] != 3) {
throw std::runtime_error("输入图像必须是 3 通道彩色图像");
}
cv::Mat img(buf.shape[0], buf.shape[1], CV_8UC3, buf.ptr);
cv::Mat img_copy = img.clone(); // 深拷贝,防止数据不一致
std::vector<Box> boxes(25200); // 假设最多 25200 个框
int final_box;
self.yolov11_Predict(img_copy, boxes, final_box, conf_thresh, nms_thresh);
boxes.resize(final_box); // 只保留有效的框
return boxes;
}, py::arg("img"), py::arg("conf_thresh") = 0.5f, py::arg("nms_thresh") = 0.4f);
}
struct Box {
float x1, y1, x2, y2;
float cls_conf;
int cls_id;
float point1_x;
float point1_y;
float point1_conf;
float point2_x;
float point2_y;
float point2_conf;
};
class DLL Yolov11_detection
{
public:
Yolov11_detection();
~Yolov11_detection();
void init_model(std::string model_path);
void yolov11_Predict(cv::Mat& img, std::vector<Box>& box, int& final_box_num, float conf_thresh, float nms_thresh);
void trt2onnx(std::string onnx_path, std::string trt_path);
4.如何调用
方法1:直接把输出的pyd和需要调用的dll(或者加环境变量)库放在python main函数同一文件夹下(linux直接放so文件就行),然后python直接import就行,如果编译器爆红也请大胆尝试跑,因为是正常的现象。
方法2:将pyd和dll文件放入anaconda3的当前环境的libs文件下,直接能被anconda3读到
import CPDAI_module
model = CPDAI_module.Yolov11_detection()
model.init_model(model_path)
for img_way in data_path:
if not os.path.exists(img_way):
continue
slice = img_way.split('_')[-1].split('.')[0]
img = cv2.imread(img_way)
results = self.model.yolov11_Predict(img, conf_thresh=0.65, nms_thresh=0.7)
results就是一个指针,直接可以.xx来获取结构体里面的东西
cls = result.cls_id # 获取类别 conf = result.cls_conf
总结:
因为为了满足一些框架无法调用cpp的目的,折腾了几天还是成功搞出来了,而且这个方法目前测试基本和cpp运行时间,有什么问题欢迎评论区