Bootstrap

C++下将onnx模型转化为TensorRT模型(动态输入输出)

一、配置简介

windows10 + GTX 1050Ti 4GB显存

cuda 11.5

onnxruntime-gpu 1.11.1

TensorRT-8.6.1.6

visual studio 2019

.onnx模型,input为float32的{1,1,N,3};output为float32的{N,7}

二、流程梳理

1.确保onnx模型是正确可用的

2.根据TensorRT提供的API实现从onnx模型到trt模型的转化

三、代码

/*
* 用于记录TensorRT的相关操作日志
*/
class Logger : public nvinfer1::ILogger {
public:
	void log(Severity severity, const char* msg) noexcept override {
		using namespace std;
		string s;
		bool printMsg = true;
		switch (severity) {
		case Severity::kINTERNAL_ERROR:
			s = "INTERNAL_ERROR";
			break;
		case Severity::kERROR:
			s = "ERROR";
			break;
		case Severity::kWARNING:
			s = "WARNING";
			break;
		case Severity::kINFO:
			s = "INFO";
			printMsg = m_printVerbose;
			break;
		case Severity::kVERBOSE:
			s = "VERBOSE";
			printMsg = m_printVerbose;
			break;
		}
		if (printMsg)
			std::cout << s << ": " << msg << endl;
	}
public:
	void setPrintVerbose(bool printVerbose) {
		this->m_printVerbose = printVerbose;
	};

private:
	bool m_printVerbose = true;
};

const wchar_t* model_path = L"..\\..\\..\\..\\model\\model.onnx";
Logger gLogger;

int ONNX2TensorRT(const wchar_t* onnxModelName)
{
	/*
	* 1.读取onnx模型
	*/
	setlocale(LC_ALL, "");			// 设置本地环境为UTF-8编码,以支持多字节字符

	// 获取所需转换的字符数
	int charCount = wcstombs(NULL, onnxModelName, 0);
	if (charCount == -1) {
		std::cerr << "转换失败" << std::endl;
		return EXIT_FAILURE;
	}

	char* modelName = new char[charCount + 1];			// 分配足够的内存来容纳转换后的字符串
	wcstombs(modelName, onnxModelName, charCount);		// 执行转换
	modelName[charCount] = '\0';						// null 终止转换后的字符串

	// 读取onnx模型文件至buffer中
	std::ifstream fs(onnxModelName, std::ios_base::in | std::ios_base::binary);
	if (!fs)
		throw "ONNX Model Path Error!";
	fs.seekg(0, std::ios::end);
	int size = (int)fs.tellg();
	fs.seekg(0, std::ios::beg);

	char* buffer = new char[size];
	fs.read(buffer, size);
	fs.close();

	/*
	* 2.创建构建build环境
	*/
	nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);				// Create an instance of an IBuilder class. 创建IBuilder实例
	nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1U << (unsigned)nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);		// Create a network definition object. 其中CreateNetworkV2支持动态输入

	nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger);		// Create a new parser object. 创建onnx解析器对象

	if (!parser->parseWithWeightDescriptors(buffer, size)) {							// Parse a serialized ONNX model into the TensorRT network with consideration of user provided weights. 将序列化的onnx模型解析到tensorRT网络中
		parser->destroy();
		builder->destroy();
		throw std::runtime_error("ERROR: could not parse ONNX model ");
	}

	//创建config
	nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();					// Create a builder configuration object. 配置对象
	auto profile = builder->createOptimizationProfile();								// If the network has any dynamic input tensors, the appropriate calls to setDimensions() must be made. 若要实现动态输入,必须有此设置
	profile->setDimensions("input",
		nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4{ 1, 1, 1000, 3});			// Set the minimum dimensions for a dynamic input tensor.
	profile->setDimensions("input",
		nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims4{ 1, 1, 4096, 3});			// Set the optimum dimensions for a dynamic input tensor.
	profile->setDimensions("input",
		nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims4{ 1, 1, 10000, 3});			// Set the maximum dimensions for a dynamic input tensor.
	
	config->addOptimizationProfile(profile);											// Add an optimization profile. 优化配置加入配置

	/*
	* 3.构造引擎Engine
	*/
	nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);	// Builds and serializes a network for the given INetworkDefinition and IBuilderConfig. 构建引擎
	if (!engine) {
		throw std::runtime_error("ERROR: failed to build engine");
	}
	nvinfer1::IHostMemory* serializedModel = engine->serialize();						// Serialize the network to a stream. 将网络序列化为流
	

	/*
	* 4.序列化文件导出
	*/
	std::string tmp = modelName;
	size_t pos = tmp.find_last_of(".");
	std::string trtModelPath = tmp.substr(0, pos) + ".trt";

	std::ofstream serialize_output_stream(trtModelPath, std::ios_base::out | std::ios_base::binary);;
	serialize_output_stream.write((char*)serializedModel->data(), serializedModel->size());
	serialize_output_stream.close();

	/*
	* 5.空间清理
	*/
	delete[] modelName;
	delete[] buffer;
	serializedModel->destroy();
	engine->destroy();
	parser->destroy();
	network->destroy();
	config->destroy();
	builder->destroy();
	return 0;
}
int main(){
    // 方法一:使用TensorRT提供的trtexec.exe程序
    system("./trtexec.exe --onnx=model.onnx --minShapes=input:1x1x1000x3 --optShapes=input:1x1x4096x3 --maxShapes=input:1x1x10000x3 --saveEngine=model.trt");
    // 方法二:使用TensorRT的API函数集
    ONNX2TensorRT(model_path);
    return 0;
}

注意事项:

1.使用trt程序进行转化时,由于模型是动态输入,所以必须指定输入范围,使用minShapes、optShapes、maxShapes三个参数指定。

2.使用API进行转化时,也要进行设置维度操作,这是与静态输入的最大差别所在。

3.本文章面对有C++基础以及部署基础的朋友,也是作者自身备忘所用,有问题可以友好交流。

;