tensorRT部署之 代码实现 onnx转engine/trt模型
- 前提已经装好显卡驱动、cuda、cudnn、以及tensorRT
- 下面将给出Python、C++两种转换方式
1. C++实现
- 项目属性配置好CUDA、tensoeRT库
- 通常在实际应用中会直接读取onnx模型进行判断,如果对应路径已经存在engine模型,将直接通过tensorrt读入engine,如果没有,则对onnx进行编译生成engine模型后在进行读入
- TensorRT在线加载模型,并序列化保存支持动态batch的引擎,实现源码可参考 TextandCode
- 一篇超级详细的onnx基础教程(非常好):TextandCode
- 代码实现:
#include <iostream>
#include <fstream>
#include "NvInfer.h"
#include "NvOnnxParser.h"
class Logger : public nvinfer1::ILogger
{void log(Severity severity, const char* msg) noexcept override{if (severity <= Severity::kWARNING)std::cout << msg << std::endl;}
} logger;void ONNX2TensorRT(const char* ONNX_file, std::string save_ngine)
{nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);uint32_t flag = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);nvinfer1::INetworkDefinition* network = builder->createNetworkV2(flag);nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, logger);parser->parseFromFile(ONNX_file, static_cast<int32_t>(nvinfer1::ILogger::Severity::kWARNING));for (int32_t i = 0; i < parser->getNbErrors(); ++i){std::cout << parser->getError(i)->desc() << std::endl;}nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 16 * (1 << 20));if (builder->platformHasFastFp16()){config->setFlag(nvinfer1::BuilderFlag::kFP16);}nvinfer1::IHostMemory* serializedModel = builder->buildSerializedNetwork(*network, *config);std::ofstream p(save_ngine, std::ios::binary);p.write(reinterpret_cast<const char*>(serializedModel->data()), serializedModel->size());delete parser;delete network;delete config;delete builder;delete serializedModel;
}void exportONNX(const char* ONNX_file, std::string save_ngine)
{std::ifstream file(ONNX_file, std::ios::binary);if (!file.good()){std::cout << "Load ONNX file failed! No file found from:" << ONNX_file << std::endl;return ;}std::cout << "Load ONNX file from: " << ONNX_file << std::endl;std::cout << "Starting export ..." << std::endl;ONNX2TensorRT(ONNX_file, save_ngine);std::cout << "Export success, saved as: " << save_ngine << std::endl;}int main(int argc, char** argv)
{const char* ONNX_file = "../weights/test.onnx";std::string save_ngine = "../weights/test.engine";exportONNX(ONNX_file, save_ngine);return 0;
}