Bootstrap

tensorrt 安装和事例程序跑通记录

本机环境:

ubutnu1804 cuda11.2

最终环境配置:cuda11.2,TensorRT8.4.1.5,cudnn8.2.4

1.下载TensorRT-8.4.1.5.Linux.x86_64-gen.cuda-11.6.cudnn8.4.tar.gz

https://developer.nvidia.com/nvidia-tensorrt-8x-download

2.tar安装
tar -xzvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz

3.进入TensorRT-8.4.1.5/sample/sampleMNIST目录
cd TensorRT-8.4.1.5/sample/sampleMNIST

4.创建CMakeLists.txt
需要设置和本机实际对应的TensorRT安装路径
cmake_minimum_required(VERSION 3.13)
project(TensorRT_test)
set(CMAKE_CXX_STANDARD 11)
 
set(SAMPLES_COMMON_SOURCES "/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp")
add_executable(TensorRT_test sampleMNIST.cpp ${SAMPLES_COMMON_SOURCES})
 
# add TensorRT8
include_directories(/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include)
include_directories(/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common)
set(TENSORRT_LIB_PATH "/home/anktech/Storage/Meng/TensorRT-8.4.1.5/lib")
file(GLOB LIBS "${TENSORRT_LIB_PATH}/*.so")
 
# add CUDA
find_package(CUDA REQUIRED)
message("CUDA_LIBRARIES:${CUDA_LIBRARIES}")
message("CUDA_INCLUDE_DIRS:${CUDA_INCLUDE_DIRS}")
include_directories(${CUDA_INCLUDE_DIRS})




# link
target_link_libraries(TensorRT_test ${LIBS} ${CUDA_LIBRARIES})
5.添加环境变量(根据实际路径,也要把cudnn的库目录添加进去)
export LD_LIBRARY_PATH=/home/anktech/Storage/Meng/TensorRT-8.4.1.5/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/media/anktech/10E09BBBE09BA58E/3rd/cudnn-11.4-linux-x64-v8.2.4.15/lib64:$LD_LIBRARY_PATH

*这里cudnn需要下载并解压到你自己的目录

6.编译和运行
mkdir build && cd build
cmake ..
make
 
./TensorRT_test

 *cmake log

-- The C compiler identification is GNU 7.5.0
-- The CXX compiler identification is GNU 7.5.0
-- Check for working C compiler: /usr/bin/cc
-- Check for working C compiler: /usr/bin/cc - works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: /usr/bin/c++
-- Check for working CXX compiler: /usr/bin/c++ - works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Looking for pthread.h
-- Looking for pthread.h - found
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - found
-- Found Threads: TRUE  
-- Found CUDA: /usr/local/cuda-11.2 (found version "11.2") 
CUDA_LIBRARIES:/usr/local/cuda-11.2/lib64/libcudart_static.a;Threads::Threads;dl;/usr/lib/x86_64-linux-gnu/librt.so
CUDA_INCLUDE_DIRS:/usr/local/cuda-11.2/include
-- Configuring done
-- Generating done
-- Build files have been written to: /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/build

*make log

Scanning dependencies of target TensorRT_test
[ 33%] Building CXX object CMakeFiles/TensorRT_test.dir/sampleMNIST.cpp.o
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferLegacyDims.h:16:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:16,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h: In member function ‘virtual bool nvinfer1::IGpuAllocator::deallocate(void*)’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1450:26: warning: ‘virtual void nvinfer1::IGpuAllocator::free(void*)’ is deprecated [-Wdeprecated-declarations]
         this->free(memory);
                          ^
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1381:33: note: declared here
     TRT_DEPRECATED virtual void free(void* const memory) noexcept = 0;
                                 ^~~~
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h: At global scope:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6258:88: warning: ‘IFullyConnectedLayer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t nbOutputs, Weights kernelWeights, Weights biasWeights) noexcept
                                                                                        ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:1443:22: note: declared here
 class TRT_DEPRECATED IFullyConnectedLayer : public ILayer
                      ^~~~~~~~~~~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6800:101: warning: ‘IRNNv2Layer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op) noexcept
                                                                                                     ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:3281:22: note: declared here
 class TRT_DEPRECATED IRNNv2Layer : public ILayer
                      ^~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp: In member function ‘bool SampleMNIST::build()’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:142:47: warning: ‘void nvinfer1::IBuilder::setMaxBatchSize(int32_t)’ is deprecated [-Wdeprecated-declarations]
     builder->setMaxBatchSize(mParams.batchSize);
                                               ^
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:8795:25: note: declared here
     TRT_DEPRECATED void setMaxBatchSize(int32_t batchSize) noexcept
                         ^~~~~~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp: In member function ‘bool SampleMNIST::infer()’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:339:97: warning: ‘bool nvinfer1::IExecutionContext::enqueue(int32_t, void* const*, cudaStream_t, CUevent_st**)’ is deprecated [-Wdeprecated-declarations]
     if (!context->enqueue(mParams.batchSize, buffers.getDeviceBindings().data(), stream, nullptr))
                                                                                                 ^
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:17:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntime.h:1948:25: note: declared here
     TRT_DEPRECATED bool enqueue(
                         ^~~~~~~
[ 66%] Building CXX object CMakeFiles/TensorRT_test.dir/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp.o
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logging.h:21:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.h:21,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp:18:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h: In member function ‘virtual bool nvinfer1::IGpuAllocator::deallocate(void*)’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1450:26: warning: ‘virtual void nvinfer1::IGpuAllocator::free(void*)’ is deprecated [-Wdeprecated-declarations]
         this->free(memory);
                          ^
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1381:33: note: declared here
     TRT_DEPRECATED virtual void free(void* const memory) noexcept = 0;
                                 ^~~~
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/sampleOptions.h:30:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logging.h:22,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.h:21,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp:18:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h: At global scope:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6258:88: warning: ‘IFullyConnectedLayer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t nbOutputs, Weights kernelWeights, Weights biasWeights) noexcept
                                                                                        ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:1443:22: note: declared here
 class TRT_DEPRECATED IFullyConnectedLayer : public ILayer
                      ^~~~~~~~~~~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6800:101: warning: ‘IRNNv2Layer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op) noexcept
                                                                                                     ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:3281:22: note: declared here
 class TRT_DEPRECATED IRNNv2Layer : public ILayer
                      ^~~~~~~~~~~
[100%] Linking CXX executable TensorRT_test
[100%] Built target TensorRT_test

*运行结果

&&&& RUNNING TensorRT.sample_mnist [TensorRT v8401] # ./TensorRT_test
[07/01/2023-17:27:41] [I] Building and running a GPU inference engine for MNIST
[07/01/2023-17:27:42] [I] [TRT] [MemUsageChange] Init CUDA: CPU +314, GPU +0, now: CPU 320, GPU 844 (MiB)
[07/01/2023-17:27:42] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +207, GPU +68, now: CPU 544, GPU 912 (MiB)
[07/01/2023-17:27:42] [W] [TRT] The implicit batch dimension mode has been deprecated. Please create the network with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag whenever possible.
[07/01/2023-17:27:43] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +398, GPU +166, now: CPU 944, GPU 1078 (MiB)
[07/01/2023-17:27:43] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +483, GPU +204, now: CPU 1427, GPU 1282 (MiB)
[07/01/2023-17:27:43] [W] [TRT] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.2.4
[07/01/2023-17:27:43] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
[07/01/2023-17:27:47] [I] [TRT] Detected 1 inputs and 1 output network tensors.
[07/01/2023-17:27:47] [I] [TRT] Total Host Persistent Memory: 8832
[07/01/2023-17:27:47] [I] [TRT] Total Device Persistent Memory: 0
[07/01/2023-17:27:47] [I] [TRT] Total Scratch Memory: 0
[07/01/2023-17:27:47] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 1 MiB, GPU 884 MiB
[07/01/2023-17:27:47] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 0.040401ms to assign 3 blocks to 11 nodes requiring 57860 bytes.
[07/01/2023-17:27:47] [I] [TRT] Total Activation Memory: 57860
[07/01/2023-17:27:47] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +0, GPU +4, now: CPU 0, GPU 4 (MiB)
[07/01/2023-17:27:47] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 1832, GPU 1425 (MiB)
[07/01/2023-17:27:47] [I] [TRT] Loaded engine size: 1 MiB
[07/01/2023-17:27:47] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +1, now: CPU 0, GPU 1 (MiB)
[07/01/2023-17:27:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +0, now: CPU 0, GPU 1 (MiB)
[07/01/2023-17:27:48] [I] Input:
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@%=#@@@@@%=%@@@@@@@@@@
@@@@@@@           %@@@@@@@@@
@@@@@@@           %@@@@@@@@@
@@@@@@@#:-#-.     %@@@@@@@@@
@@@@@@@@@@@@#    #@@@@@@@@@@
@@@@@@@@@@@@@    #@@@@@@@@@@
@@@@@@@@@@@@@:  :@@@@@@@@@@@
@@@@@@@@@%+==   *%%%%%%%%%@@
@@@@@@@@%                 -@
@@@@@@@@@#+.          .:-%@@
@@@@@@@@@@@*     :-###@@@@@@
@@@@@@@@@@@*   -%@@@@@@@@@@@
@@@@@@@@@@@*   *@@@@@@@@@@@@
@@@@@@@@@@@*   @@@@@@@@@@@@@
@@@@@@@@@@@*   #@@@@@@@@@@@@
@@@@@@@@@@@*   *@@@@@@@@@@@@
@@@@@@@@@@@*   *@@@@@@@@@@@@
@@@@@@@@@@@*   @@@@@@@@@@@@@
@@@@@@@@@@@*   @@@@@@@@@@@@@
@@@@@@@@@@@@+=#@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@

[07/01/2023-17:27:48] [I] Output:
0: 
1: 
2: 
3: 
4: 
5: 
6: 
7: **********
8: 
9: 

&&&& PASSED TensorRT.sample_mnist [TensorRT v8401] # ./TensorRT_test

;