Bootstrap

【无标题】tensorrt c++推理yolov5

本文将使用C++结合Tensorrt来实现YOLO的推理过程

Tensorrt安装

Tensorrt 下载链接 版本选着根据自己电脑的cuda版本,解压后完成一下操作。在这下载里插入图片描述1.将 TensorRT-8.2.2.1\include中头文件 copy 到C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include

2.将TensorRT-8.2.2.1\lib 中所有lib文件 copy 到C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\lib\x64

3.将TensorRT-8.2.2.1\lib 中所有dll文件copy 到C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin

验证安装是否成功

进入 TensorRT-8.2.2.1/samples/sampleMNIST中,用vs2017打开sample_mnist.sln

项目属性里搞定包含目录(D:\tensorrt\include)和库目录(D:\tensorrt\lib)

项目属性->链接器->输入->附加依赖性->把tensorrt.lib中lib文件的名字加入

在sample_mnist.sln下,重新生成解决方案,再执行,若出现类似数字的图案,即认为配置TensorRT成功
在这里插入图片描述

opencv安装

安装详细教程1
安装详细教程2
以上俩个教程需要自己通过cmake工具去编译
这个是github上编译好的
只需要下载解压 添加系统变量环境。
这个是ubuntu的安装方式

pt转onnx,onnx转engine

#yolov8 导出代码
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
model.export(format='onnx')

yolov5 就按照官方的转就行

在这里插入图片描述
Netron打开onnx文件,可查看模型的输入和输出。
在这里插入图片描述

使用tensorrt的工具将onnx转为engine

在这里插入图片描述

其他参数自行查询

trtexec.exe --onnx=yolov5s.onnx --saveEngine=yolov5s.engine

在这里插入图片描述
!](https://img-blog.csdnimg.cn/direct/88ccd92c8cfc4856849e275b19570d29.png)

CMakeLists.txt

cmake_minimum_required (VERSION 3.8)
project (test)
#opencv
set(OpenCV_DIR "D:/opencv4.7/build")
find_package(OpenCV REQUIRED)
#tensor
include_directories("D:/TensorRT-8.2.3.0/include")
link_directories("D:/TensorRT-8.2.3.0/lib")
include_directories("D:/TensorRT-8.2.3.0/samples/common")
#cuda
include_directories("D:/CUDA/NVIDIA GPU Computing Toolkit/CUDA/v11.3/include")
link_directories("D:/CUDA/NVIDIA GPU Computing Toolkit/CUDA/v11.3/lib")
#可执行文件
add_executable(test  "main.cpp" "main.h"  "D:/TensorRT-8.2.3.0/samples/common/logger.cpp")
#链接库
target_link_libraries(test ${OpenCV_LIBS})   
target_link_libraries(test  D:/TensorRT-8.2.3.0/samples/common)
target_link_libraries(test nvinfer)
target_link_libraries(test cudart)

主函数 yolo.cpp

int main() {

	std::vector<unsigned char> engine_data;

	int fsize = 0;
	int DLACore = 0;

	nvinfer1::ICudaEngine* engine = load_engine_file2(filename, DLACore);

	if (!engine){return -1;}

	nvinfer1::IExecutionContext* context = engine->createExecutionContext();

	std::vector<void*> buffers;

	buffers.resize(2);

	CHECK(cudaMalloc(&buffers[0], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
	CHECK(cudaMalloc(&buffers[1], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));

	cudaStream_t stream;
	CHECK(cudaStreamCreate(&stream));

	float* data_model = new float[BATCH_SIZE * 3 * INPUT_H * INPUT_W];

	proprecess(file_name, data_model);

	CHECK(cudaMemcpyAsync((float*)buffers[0], data_model, nInputSize, cudaMemcpyHostToDevice, stream));

	context->setBindingDimensions(0, nvinfer1::Dims4(BATCH_SIZE, 3, INPUT_H, INPUT_W));

	context->executeV2(buffers.data());

	float* prob_model = new float[BATCH_SIZE * OUTPUT_SIZE];

	CHECK(cudaMemcpyAsync(prob_model, buffers[1], BATCH_SIZE * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));

	cudaStreamSynchronize(stream);

	return 0;
}

图片前处理

cv::Mat letterboxs(cv::Mat image) {
	int x, y, w, h = 0;
    // 计算放缩因子,根据缩放比例,确定以哪个方向(高度或宽度)作为基准进行缩放。
	float rh = INPUT_H / (image.rows * 1.0);  
	float rw = INPUT_W / (image.cols * 1.0);
	// 计算缩放后的图像的宽度 w 和高度 h,以及在目标图像中的位置 x 和 y。
	if (rh > rw) {
		w = INPUT_W;
		h = image.rows * rw;
		x = 0;
		y = (INPUT_H - h) / 2;
	}
	else {
		w = image.cols * rh;
		h = INPUT_H;
		y = 0;
		x = (INPUT_W - w) / 2;
	}
	// 创建一个目标大小为 INPUT_W x INPUT_H 的灰色图像 (letterbox)。
	cv::Mat letterbox(INPUT_W, INPUT_H, image.type(), cv::Scalar(128, 128, 128));
	//将原始图像缩放到目标大小 (w, h),并将结果复制到 letterbox 中的指定区域 (x, y, w, h)。
	cv::resize(image, letterbox(cv::Rect(x, y, w, h)), cv::Size(w, h));
	return letterbox;
}
void proprecess(cv::Mat img, int h, int w, float* data) {
    cv::Mat pr_img = letterboxs(img, w, h);
    for (int i = 0; i < h * w; i++) {
        data[i] = pr_img.at<cv::Vec3b>(i)[0] / 255.0;
        data[i + h * w] = pr_img.at<cv::Vec3b>(i)[1] / 255.0;
        data[i + 2 * h * w] = pr_img.at<cv::Vec3b>(i)[2] / 255.0;
    }
}

构建TensorRT引擎

加载engine文件

nvinfer1::ICudaEngine* load_engine_file2(string filename, int DLACore) {
	int fsize;

	std::vector<unsigned char> engine_data;  

	std::ifstream engine_file(filename, std::ios::binary);

	engine_file.seekg(0, engine_file.end);

	fsize = engine_file.tellg();

	engine_data.resize(fsize);

	engine_file.seekg(0, engine_file.beg);

	engine_file.read(reinterpret_cast<char*>(engine_data.data()), fsize);

	static Logger gLogger;
	nvinfer1::IRuntime*  runtime = nvinfer1::createInferRuntime(gLogger.getTRTLogger());

	if (DLACore != -1)
	{
		runtime->setDLACore(DLACore);
	}

	return runtime->deserializeCudaEngine(engine_data.data(), fsize, nullptr);
	
}

后处理

	//  ========== 8. 获取推理结果 =========
	std::vector<std::vector<float>> prediction(25200, std::vector<float>(85));

	int index = 0;
	for (int i = 0; i < COLS; ++i) {
		for (int j = 0; j < ROWS; ++j) {
			prediction[i][j] = prob_model[index++];
		}
	}
	//  ========== 9. 大于conf_thres加入xc =========
	std::vector<std::vector<float>> xc;
	for (const auto& row : prediction) {
		if (row[4] > conf_thres) {
			xc.push_back(row);
		}
	}
	//  ========== 10. 置信度 = obj_conf * cls_conf =========
	std::cout << xc[0].size() << endl;
	for (auto& row : xc) {
		for (int i = 5;i<xc[0].size();i++) {
			row[i] *= row[4];
		}
	}
	// ========== 11. 切片取出xywh 转为xyxy=========
	std::vector<std::vector<float>> xywh;
	for (const auto& row : xc) {
		std::vector<float> sliced_row(row.begin(), row.begin() + 4);
		xywh.push_back(sliced_row);
	}
	std::vector<std::vector<float>> box(xywh.size(), std::vector<float>(4, 0.0));
	xywhtoxxyy(xywh,box);
void xywhtoxxyy(std::vector<std::vector<float>> xywh, std::vector<std::vector<float>>& box) {

	// 执行操作 y[..., 0] = x[..., 0] - x[..., 2] / 2
	for (std::size_t i = 0; i < xywh.size(); ++i) {
		box[i][0] = xywh[i][0] - xywh[i][2] / 2;
	}

	// 执行操作 y[..., 1] = x[..., 1] - x[..., 3] / 2
	for (std::size_t i = 0; i < xywh.size(); ++i) {
		box[i][1] = xywh[i][1] - xywh[i][3] / 2;
	}

	// 执行操作 y[..., 2] = x[..., 0] + x[..., 2] / 2
	for (std::size_t i = 0; i < xywh.size(); ++i) {
		box[i][2] = xywh[i][0] + xywh[i][2] / 2;
	}

	// 执行操作 y[..., 3] = x[..., 1] + x[..., 3] / 2
	for (std::size_t i = 0; i < xywh.size(); ++i) {
		box[i][3] = xywh[i][1] + xywh[i][3] / 2;
	}
}
	// ========== 12. 获取置信度最高的类别和索引=========
	std::size_t mi = xc[0].size();
	std::vector<float> conf(xc.size(), 0.0);
	std::vector<float> j(xc.size(), 0.0);

	for (std::size_t i = 0; i < xc.size(); ++i) {
		// 模拟切片操作 x[:, 5:mi]
		auto sliced_x = std::vector<float>(xc[i].begin() + 5, xc[i].begin() + mi);

		// 计算 max
		auto max_it = std::max_element(sliced_x.begin(), sliced_x.end());

		// 获取 max 的索引
		std::size_t max_index = std::distance(sliced_x.begin(), max_it);

		// 将 max 的值和索引存储到相应的向量中
		conf[i] = *max_it;
		j[i] = max_index;  // 加上切片的起始索引
	}
	// ========== 13. concat x1, y1, x2, y2, score, index;======== =
	for (int i = 0; i < xc.size(); i++) {
		box[i].push_back(conf[i]);
		box[i].push_back(j[i]);
	}

	std::vector<std::vector<float>> output;
	for (int i = 0; i < xc.size(); i++) {
			output.push_back(box[i]); // 创建一个空的 float 向量并
	}
	// ==========14 应用非最大抑制 ==========
	std::vector<BoundingBox>  result = nonMaximumSuppression(output, overlapThreshold);
	struct BoundingBox {
	float x1, y1, x2, y2, score, index;
};

float iou(const BoundingBox& box1, const BoundingBox& box2) {
	float max_x = max(box1.x1, box2.x1);  // 找出左上角坐标哪个大
	float min_x = min(box1.x2, box2.x2);  // 找出右上角坐标哪个小
	float max_y = max(box1.y1, box2.y1);
	float min_y = min(box1.y2, box2.y2);
	if (min_x <= max_x || min_y <= max_y) // 如果没有重叠
		return 0;
	float over_area = (min_x - max_x) * (min_y - max_y);  // 计算重叠面积
	float area_a = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
	float area_b = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
	float iou = over_area / (area_a + area_b - over_area);
	return iou;
}

std::vector<BoundingBox> nonMaximumSuppression(std::vector<std::vector<float>>& boxes, float overlapThreshold) {
	std::vector<BoundingBox> convertedBoxes;

	// 将数据转换为BoundingBox结构体
	for (const auto& box : boxes) {
		if (box.size() == 6) { // Assuming [x1, y1, x2, y2, score]
			BoundingBox bbox;
			bbox.x1 = box[0];
			bbox.y1 = box[1];
			bbox.x2 = box[2];
			bbox.y2 = box[3];
			bbox.score = box[4];
			bbox.index = box[5];
			convertedBoxes.push_back(bbox);
		}
		else {
			std::cerr << "Invalid box format!" << std::endl;
		}
	}

	// 对框按照分数降序排序
	std::sort(convertedBoxes.begin(), convertedBoxes.end(), [](const BoundingBox& a, const BoundingBox& b) {
		return a.score > b.score;
		});

	// 非最大抑制
	std::vector<BoundingBox> result;
	std::vector<bool> isSuppressed(convertedBoxes.size(), false);

	for (size_t i = 0; i < convertedBoxes.size(); ++i) {
		if (!isSuppressed[i]) {
			result.push_back(convertedBoxes[i]);

			for (size_t j = i + 1; j < convertedBoxes.size(); ++j) {
				if (!isSuppressed[j]) {
					float overlap = iou(convertedBoxes[i], convertedBoxes[j]);

					if (overlap > overlapThreshold) {
						isSuppressed[j] = true;
					}
				}
			}
		}
	}

	// 输出结果
	std::cout << "NMS Result:" << std::endl;
	for (const auto& box : result) {
		std::cout << "x1: " << box.x1 << ", y1: " << box.y1
			<< ", x2: " << box.x2 << ", y2: " << box.y2
			<< ", score: " << box.score << ",index:" << box.index << std::endl;
	}

	return result;
}

	// ==========15 画框 ==========
for (auto& row:result){

		cv::Rect r = get_rect(r_image, row);

		double rounded_score = round(row.score * 100) / 100;
		std::string score_str = cv::format("%.2f", rounded_score); // 格式化浮点数为字符串,保留两位小数

		cv::rectangle(r_image, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
		cv::putText(r_image, class_names[(int)row.index], cv::Point(r.x, r.y-10), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0x27, 0xC1, 0x36), 1);	
		cv::putText(r_image, score_str, cv::Point(r.x+80, r.y-10), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0x27, 0xC1, 0x36), 1);
	}

	cv::imshow("image", r_image);

	cv::waitKey(0);

完整项目代码私信

结果测试
在这里插入图片描述

;