一、配置简介
windows10 + GTX 1050Ti 4GB显存
cuda 11.5
onnxruntime-gpu 1.11.1
TensorRT-8.6.1.6
visual studio 2019
.onnx模型,input为float32的{1,1,N,3};output为float32的{N,7}
二、流程梳理
1.确保onnx模型是正确可用的
2.根据TensorRT提供的API实现从onnx模型到trt模型的转化
三、代码
/*
* 用于记录TensorRT的相关操作日志
*/
class Logger : public nvinfer1::ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
using namespace std;
string s;
bool printMsg = true;
switch (severity) {
case Severity::kINTERNAL_ERROR:
s = "INTERNAL_ERROR";
break;
case Severity::kERROR:
s = "ERROR";
break;
case Severity::kWARNING:
s = "WARNING";
break;
case Severity::kINFO:
s = "INFO";
printMsg = m_printVerbose;
break;
case Severity::kVERBOSE:
s = "VERBOSE";
printMsg = m_printVerbose;
break;
}
if (printMsg)
std::cout << s << ": " << msg << endl;
}
public:
void setPrintVerbose(bool printVerbose) {
this->m_printVerbose = printVerbose;
};
private:
bool m_printVerbose = true;
};
const wchar_t* model_path = L"..\\..\\..\\..\\model\\model.onnx";
Logger gLogger;
int ONNX2TensorRT(const wchar_t* onnxModelName)
{
/*
* 1.读取onnx模型
*/
setlocale(LC_ALL, ""); // 设置本地环境为UTF-8编码,以支持多字节字符
// 获取所需转换的字符数
int charCount = wcstombs(NULL, onnxModelName, 0);
if (charCount == -1) {
std::cerr << "转换失败" << std::endl;
return EXIT_FAILURE;
}
char* modelName = new char[charCount + 1]; // 分配足够的内存来容纳转换后的字符串
wcstombs(modelName, onnxModelName, charCount); // 执行转换
modelName[charCount] = '\0'; // null 终止转换后的字符串
// 读取onnx模型文件至buffer中
std::ifstream fs(onnxModelName, std::ios_base::in | std::ios_base::binary);
if (!fs)
throw "ONNX Model Path Error!";
fs.seekg(0, std::ios::end);
int size = (int)fs.tellg();
fs.seekg(0, std::ios::beg);
char* buffer = new char[size];
fs.read(buffer, size);
fs.close();
/*
* 2.创建构建build环境
*/
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger); // Create an instance of an IBuilder class. 创建IBuilder实例
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1U << (unsigned)nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); // Create a network definition object. 其中CreateNetworkV2支持动态输入
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger); // Create a new parser object. 创建onnx解析器对象
if (!parser->parseWithWeightDescriptors(buffer, size)) { // Parse a serialized ONNX model into the TensorRT network with consideration of user provided weights. 将序列化的onnx模型解析到tensorRT网络中
parser->destroy();
builder->destroy();
throw std::runtime_error("ERROR: could not parse ONNX model ");
}
//创建config
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); // Create a builder configuration object. 配置对象
auto profile = builder->createOptimizationProfile(); // If the network has any dynamic input tensors, the appropriate calls to setDimensions() must be made. 若要实现动态输入,必须有此设置
profile->setDimensions("input",
nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4{ 1, 1, 1000, 3}); // Set the minimum dimensions for a dynamic input tensor.
profile->setDimensions("input",
nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims4{ 1, 1, 4096, 3}); // Set the optimum dimensions for a dynamic input tensor.
profile->setDimensions("input",
nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims4{ 1, 1, 10000, 3}); // Set the maximum dimensions for a dynamic input tensor.
config->addOptimizationProfile(profile); // Add an optimization profile. 优化配置加入配置
/*
* 3.构造引擎Engine
*/
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); // Builds and serializes a network for the given INetworkDefinition and IBuilderConfig. 构建引擎
if (!engine) {
throw std::runtime_error("ERROR: failed to build engine");
}
nvinfer1::IHostMemory* serializedModel = engine->serialize(); // Serialize the network to a stream. 将网络序列化为流
/*
* 4.序列化文件导出
*/
std::string tmp = modelName;
size_t pos = tmp.find_last_of(".");
std::string trtModelPath = tmp.substr(0, pos) + ".trt";
std::ofstream serialize_output_stream(trtModelPath, std::ios_base::out | std::ios_base::binary);;
serialize_output_stream.write((char*)serializedModel->data(), serializedModel->size());
serialize_output_stream.close();
/*
* 5.空间清理
*/
delete[] modelName;
delete[] buffer;
serializedModel->destroy();
engine->destroy();
parser->destroy();
network->destroy();
config->destroy();
builder->destroy();
return 0;
}
int main(){
// 方法一:使用TensorRT提供的trtexec.exe程序
system("./trtexec.exe --onnx=model.onnx --minShapes=input:1x1x1000x3 --optShapes=input:1x1x4096x3 --maxShapes=input:1x1x10000x3 --saveEngine=model.trt");
// 方法二:使用TensorRT的API函数集
ONNX2TensorRT(model_path);
return 0;
}
注意事项:
1.使用trt程序进行转化时,由于模型是动态输入,所以必须指定输入范围,使用minShapes、optShapes、maxShapes三个参数指定。
2.使用API进行转化时,也要进行设置维度操作,这是与静态输入的最大差别所在。
3.本文章面对有C++基础以及部署基础的朋友,也是作者自身备忘所用,有问题可以友好交流。