Bootstrap

ubuntu20.04使用C++与TensorRT8.2对yolov8分类模型进行推理预测、适配多batch推理(附源码)

ubuntu20.04使用C++与TensorRT8.2对yolov8分类模型进行推理预测、并且能够适配多batch推理。

  • TensorRT版本 8.2.5.1
  • opencv版本 4.5.5
  • cuda版本 11.6

默认以上环境都已配置完成。
pt模型转onnx指令,自定义batch大小

yolo export model=best.pt format=onnx batch=1
yolo export model=best.pt format=onnx batch=4
yolo export model=best.pt format=onnx batch=8
yolo export model=best.pt format=onnx batch=16

onnx模型转engien指令(需要修改为自己安装的TensorRT路径)

/home/user/tools/TensorRT-8.2.5.1/bin/trtexec --onnx=best.onnx --workspace=4096 --fp16 --dumpLayerInfo --saveEngine=best_4.engine

上述onnx转engien过程耗时比较长(10分钟左右),如果等到终端打印&&&& PASSED字样,表示转换成功。

源码在附件中。
编译运行(需要自行修改相关配置参数)

mkdir build
cd build
cmake ..
make && ./color_classify
  • 头文件 ---- simple_color_classification.h
#pragma once
#include <ctime>
#include <fstream>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc.hpp>
#include "json.h"
// #include <json/json.hpp>   // 如果安装了jsoncpp的包就可以用这个include
#include "NvInfer.h"
#include <numeric>

using namespace std;
using namespace cv;
using namespace nvinfer1;

class MyLogger : public nvinfer1::ILogger
{
public:
    explicit MyLogger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kWARNING) : severity_(severity) {}

    void log(nvinfer1::ILogger::Severity severity, const char *msg) noexcept override
    {
        if (severity <= severity_)
        {
            cerr << msg << endl;
        }
    }
    nvinfer1::ILogger::Severity severity_;
};


class Color_classifier
{
public:
    string engine_path;          // 模型路径
    string data_save_path;       // 保存图片路径

    // 模型加载、反序列化
    MyLogger color_classification_trt_logger;
    nvinfer1::IExecutionContext *color_classification_trt_context;
    void *buffers[2];
    cudaStream_t stream;
    //  模型输入参数
    int input_size = 1;
    size_t model_input_batch_size;
    size_t model_input_channels;
    size_t model_input_width;
    size_t model_input_height;
    float *d_input_buffer;
    //  模型输出参数
    int output_size = 1;
    size_t model_output_batch_size;
    size_t model_output_class_num;
    size_t model_output_target_size;
    size_t model_output_target_num;
    size_t model_output_target_len;
    float *d_output_buffer, *output_buffer;

    // 保存图片
    int save_data = 0;
    int data_count = 0;

    // 颜色类别集合
    vector<string> color_results;

public:
    Color_classifier(string color_classification_cfg_path);
    ~Color_classifier();
    vector<int> color_classification(vector<Mat> input_img);

    Mat preprocessImg(Mat &img);
    vector<int> color_classification_infer(vector<Mat> input_img);
};

  • 头文件 ---- json.h (字数太多超出限制了,源码在附件中)
    如果你已经安装了jsoncpp,可以不需要该头文件,但是需要将simple_color_classification.h中的#include “json.h” 换成#include <json/json.hpp>,并将CMakeLists.txt中jsoncpp相关的include_directories和target_link_libraries解开注释,还需要将add_executable中的src/jsoncpp.cpp注释。

  • 源文件 ---- simple_color_classification.cpp

#include "simple_color_classification.h"
// #include <omp.h>

Color_classifier::Color_classifier(string color_classification_cfg_path)
{
    cout << "***********************    Init Color_classifier   "
            "***********************color_classification_cfg_path ="
         << color_classification_cfg_path << endl;
    if (color_classification_cfg_path.empty())
    {
        color_classification_cfg_path = "color_classification_cfg_path.json";
        std::cerr << "not found color_classification_cfg_path.json path in "
                     "config file !"
                  << std::endl;
    }
    //  解析配置文件
    std::ifstream inconfig(color_classification_cfg_path, std::ios::binary);
    if (!inconfig.is_open())
    {
        std::cerr << "fail to open color classification config" << std::endl;
        return;
    }
    Json::Reader reader;
    Json::Value color_classification_cfg;
    if (!reader.parse(inconfig, color_classification_cfg))
    {
        std::cerr << "decoder fail to parse config" << std::endl;
    }
    inconfig.close();

    // 模型路径
    if (!color_classification_cfg.isMember("engine_path"))
    {
        std::cerr << "not found engine path in config file !" << std::endl;
    }
    else
    {
        engine_path = color_classification_cfg["engine_path"].asString();
    }

    // 读取颜色类别
    if (!color_classification_cfg.isMember("color_names"))
    {
        std::cerr << "not found color_names in config file !" << std::endl;
    }
    else
    {
        for (int i = 0; i < color_classification_cfg["color_names"].size(); i++)
        {
            this->color_results.push_back(
                color_classification_cfg["color_names"][i].asString());
        }
    }

    // 是否保存数据
    if (!color_classification_cfg.isMember("save_data"))
    {
        cout << "save_data is not fonud in config file .........\n";
    }
    else
    {
        this->save_data = color_classification_cfg["save_data"].asBool();
    }

    // 数据保存路径
    if (!color_classification_cfg.isMember("data_save_path"))
    {
        std::cerr << "not found save path in config file !" << std::endl;
    }
    else
    {
        this->data_save_path =
            color_classification_cfg["data_save_path"].asString();
    }

    stringstream engine_file_stream;
    engine_file_stream.seekg(
        0, engine_file_stream
               .beg); // 从起始位置偏移0个字节,指针移动到文件流的开头
    std::ifstream ifs(engine_path);
    engine_file_stream
        << ifs.rdbuf(); // 将读取到的数据流交给engine_file_stream
    ifs.close();

    engine_file_stream.seekg(
        0, std::ios::end);                             // 先把文件输入流指针定位到文档末尾来获取文档的长度
    const int model_size = engine_file_stream.tellg(); // 获取文件流的总长度
    engine_file_stream.seekg(0, std::ios::beg);
    void *model_mem = malloc(model_size); // 开辟一样长的空间
    engine_file_stream.read(static_cast<char *>(model_mem),
                            model_size); // 将内容读取到model_mem中

    nvinfer1::IRuntime *runtime =
        nvinfer1::createInferRuntime(color_classification_trt_logger);
    nvinfer1::ICudaEngine *engine =
        runtime->deserializeCudaEngine(model_mem, model_size);

    free(model_mem);
    color_classification_trt_context = engine->createExecutionContext();

    int inputIndex;
    int outputIndex;
    inputIndex = 0;
    outputIndex = 1;

    // 获取输入/输出tensor的索引:
    Dims in_shape =
        color_classification_trt_context->getBindingDimensions(inputIndex);

    model_input_batch_size = in_shape.d[0];
    model_input_channels = in_shape.d[1];
    model_input_width = in_shape.d[3];
    model_input_height = in_shape.d[2];

    cout << "model_input_batch_size=" << model_input_batch_size
         << " model_input_channels=" << model_input_channels
         << " model_input_width=" << model_input_width
         << " model_input_height=" << model_input_height << endl;

    Dims out_shape =
        color_classification_trt_context->getBindingDimensions(outputIndex);

    model_output_batch_size = out_shape.d[0];
    model_output_class_num = out_shape.d[1];

    cout << "model_output_batch_size=" << model_output_batch_size
         << "model_output_class_num=" << model_output_class_num << endl;

    // 获取模型输入尺寸并分配GPU内存
    nvinfer1::Dims input_dim = engine->getBindingDimensions(inputIndex);

    for (int j = 0; j < input_dim.nbDims; ++j)
    {
        if (input_dim.d[j] < 0)
            input_size *= -input_dim.d[j];
        else
            input_size *= input_dim.d[j];
    }
    cudaMalloc((void **)&d_input_buffer, input_size * sizeof(float));
    // 给模型输入数据分配相应的CPU内存
    buffers[0] = d_input_buffer;

    // 获取模型输出尺寸并分配GPU内存
    nvinfer1::Dims output_dim = engine->getBindingDimensions(outputIndex);

    for (int j = 0; j < output_dim.nbDims; ++j)
    {
        if (output_dim.d[j] < 0)
            output_size *= -output_dim.d[j];
        else
            output_size *= output_dim.d[j];
    }
    cudaMalloc((void **)&d_output_buffer, output_size * sizeof(float));
    cudaMallocHost((void **)&output_buffer, output_size * sizeof(float));

    // 给模型输出数据分配相应的CPU内存
    buffers[1] = d_output_buffer;
    // 数据投入
    cudaStreamCreate(&stream);
}

Color_classifier::~Color_classifier()
{
    // 内存回收
    cudaStreamDestroy(stream);
    cudaFree(d_output_buffer);
    cudaFree(d_input_buffer);
    cudaFreeHost(output_buffer);
    delete color_classification_trt_context;
}

Mat Color_classifier::preprocessImg(Mat &img)
{
    cv::Mat resize_image;
    float _ratio = std::min(model_input_width / (img.cols * 1.0f),
                            model_input_height / (img.rows * 1.0f));

    // 等比例缩放
    int border_width = img.cols * _ratio;
    int border_height = img.rows * _ratio;

    // 计算偏移值
    int x_offset = (model_input_width - border_width) / 2;
    int y_offset = (model_input_height - border_height) / 2;
    // 将输入图像缩放至resize_image
    cv::resize(img, resize_image, cv::Size(border_width, border_height));
    cv::copyMakeBorder(resize_image, resize_image, y_offset, y_offset, x_offset,
                       x_offset, cv::BORDER_CONSTANT,
                       cv::Scalar(114, 114, 114));
    // 转换为RGB格式
    cv::cvtColor(resize_image, resize_image, cv::COLOR_BGR2RGB);
    return resize_image;
}

vector<int> Color_classifier::color_classification_infer(
    vector<Mat> input_img)
{
    float *input_blob =
        new float[model_input_height * model_input_width *
                  model_input_channels * model_input_batch_size];

    auto start = std::chrono::system_clock::now();

    // #pragma omp parallel
    {
        for (int img = 0; img < model_input_batch_size; img++)
        {
            // #pragma omp for
            for (int i = 0; i < model_input_width * model_input_height; i++)
            {
                for (int j = 0; j < model_input_channels; j++)
                {
                    // 填充input_blob的时候需要根据通道的顺序来,不然检测结果有大问题

                    // input_blob[img * model_input_channels * model_input_width *
                    //                model_input_height +
                    //            i + j * model_input_width * model_input_height] =
                    //     input_img[img].at<cv::Vec3b>(
                    //         i)[model_input_channels - j - 1] /
                    //     255.0;

                    input_blob[img * model_input_channels * model_input_width *
                                   model_input_height +
                               i + j * model_input_width * model_input_height] =
                        input_img[img].at<cv::Vec3b>(
                            i)[j] / 255.0;
                }
            }
        }
    }
    auto end = std::chrono::system_clock::now();
    // std::cout << "填充input_blob时间:" <<
    // std::chrono::duration_cast<std::chrono::milliseconds>(end -
    // start).count() << "ms" << std::endl;

    auto start1 = std::chrono::system_clock::now();
    // 拷贝输入数据
    cudaMemcpyAsync(buffers[0], input_blob, input_size * sizeof(float),
                    cudaMemcpyHostToDevice, stream);

    // 执行推理
    if (color_classification_trt_context->enqueueV2(buffers, stream, nullptr))
    {
        cout << "执行推理成功" << endl;
    }
    else
    {
        cout << "执行推理失败" << endl;
    }
    // 拷贝输出数据
    cudaMemcpyAsync(output_buffer, buffers[1], output_size * sizeof(float),
                    cudaMemcpyDeviceToHost, stream);

    cudaStreamSynchronize(stream);

    // 输出结果
    vector<int> label_indexs;

    for (int i = 0; i < model_input_batch_size; i++)
    {
        std::vector<float> sampleOutput(
            output_buffer + i * model_output_class_num,
            output_buffer + (i + 1) * model_output_class_num);
        int label_index = std::distance(
            sampleOutput.begin(),
            std::max_element(sampleOutput.begin(), sampleOutput.end()));

        label_indexs.push_back(label_index);
    }
    auto end1 = std::chrono::system_clock::now();
    // std::cout << "推理时间:" <<
    // std::chrono::duration_cast<std::chrono::milliseconds>(end1 -
    // start1).count() << "ms" << std::endl;

    delete[] input_blob;
    return label_indexs;
}

vector<int> Color_classifier::color_classification(vector<Mat> input_img)
{
    vector<Mat> per_img;
    vector<int> label_indexs;
    if (input_img.size() == this->model_input_batch_size)
    {
        for (int i = 0; i < this->model_input_batch_size; i++)
        {
            Mat resize_image = preprocessImg(input_img[i]);
            per_img.push_back(resize_image);
        }
        label_indexs = color_classification_infer(per_img);
    }
    return label_indexs;
}

  • 源文件 ---- json.cpp (字数太多超出限制了,源码在附件中)
    如果你已经安装了jsoncpp,可以不需要该源文件,但是需要将simple_color_classification.h中的#include “json.h” 换成#include <json/json.hpp>,并将CMakeLists.txt中jsoncpp相关的include_directories和target_link_libraries解开注释,还需要将add_executable中的src/jsoncpp.cpp注释。

  • 源文件 ---- test.cpp

#include "simple_color_classification.h"

using namespace std;
using namespace cv;

int main()
{
    Color_classifier Color_classifier("../cfg/color_classification_cfg_path.json");

    // 测试图片文件路径
    std::string pic_filepath = "/test_data";

    std::vector<cv::String> pic_filenames;
    cv::glob(pic_filepath + "/*.jpg", pic_filenames);
    
    for (int i = 0; i < int(pic_filenames.size() / Color_classifier.model_input_batch_size); i++)
    {
        vector<Mat> input_imgs;
        vector<int> labels;
        for (int j = 0; j < Color_classifier.model_input_batch_size; j++)
        {
            Mat img = imread(pic_filenames[i * Color_classifier.model_input_batch_size + j]);
            input_imgs.push_back(img);
        }
        labels = Color_classifier.color_classification(input_imgs);

        // 如果需要保存文件
        if (Color_classifier.save_data)
        {
            for (int j = 0; j < Color_classifier.model_input_batch_size; j++)
            {
                cv::putText(input_imgs[j], Color_classifier.color_results[labels[j]], cv::Point(9,18), cv::FONT_ITALIC, 1, cv::Scalar(0, 0, 0), 2);
                cv::imwrite(Color_classifier.data_save_path + to_string(Color_classifier.data_count) + "_color_classify.jpg", input_imgs[j]);
                Color_classifier.data_count++;
            }
        }

        input_imgs.clear();
        labels.clear();
    }

    return 0;
}

  • 配置文件 ---- color_classification_cfg_path.json
{
    "engine_path" : "/home/user/color_classify/model/best_4.engine", // engine文件路径
    "save_data" : 0,   // 是否保存图片
    "data_save_path" : "/home/mec/wushuang/color_classification/data_res/",// 保存图片路径
    "color_names" : ["black","blue","bluegreen","brown","champagne","darkred","green","grey","orange","pink","purple","silver","white","yellow"]  //颜色类别

}
  • CMakeLists.txt
cmake_minimum_required(VERSION 3.0)
project(color_classify)

add_definitions(-std=c++11)

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Release)

include_directories("include/")

find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS} 
                    /home/user/TensorRT-8.2.5.1/include 
                    # /usr/include/jsoncpp/
)

# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)

find_package(CUDA REQUIRED)
message(STATUS " libraries: ${CUDA_LIBRARIES}")
message(STATUS " include path: ${CUDA_INCLUDE_DIRS}")
include_directories(${CUDA_INCLUDE_DIRS})
enable_language(CUDA)

# find_package(OpenMP)
# if (OPENMP_FOUND)
#     set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
#     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
# endif()

add_executable(color_classify
               src/simple_color_classification.cpp
               src/test.cpp
               src/jsoncpp.cpp
               )

target_link_libraries(color_classify nvinfer)
target_link_libraries(color_classify nvonnxparser)
# target_link_libraries(color_classify cudart)
target_link_libraries(color_classify "/usr/local/cuda-11.6/lib64/libcudart.so")
target_link_libraries(color_classify ${OpenCV_LIBS} 
# jsoncpp
)


;