TensorRT API将多个engine文件及plugin合并为一个engine文件

news/2024/11/29 22:52:06/

以下是使用TensorRT API将多个engine文件合并为一个engine文件的代码示例:

import tensorrt as trt
import numpy as np# create a TensorRT logger
logger = trt.Logger(trt.Logger.WARNING)# specify the names of the input and output bindings
input_names = ['input_0']
output_names = ['output_0']# create a builder
builder = trt.Builder(logger)# set the maximum batch size and workspace size
max_batch_size = 1
max_workspace_size = 1 << 30 # 1GB
builder.max_batch_size = max_batch_size
builder.max_workspace_size = max_workspace_size# create an optimizer profile with dynamic shapes support
min_input_shape = {"input_0": (1, 3, 224, 224)}
max_input_shape = {"input_0": (max_batch_size, 3, 224, 224)}
opt_profile = builder.create_optimization_profile()
opt_profile.set_shape(input_names[0], min_input_shape[input_names[0]], max_input_shape[input_names[0]], max_input_shape)# load the engine files
engine_files = ['engine_file_0.trt', 'engine_file_1.trt', 'engine_file_2.trt']# create a list of engines
engines = []
for engine_file in engine_files:# deserialize the engine from filewith open(engine_file, 'rb') as f:engine_data = f.read()engine = builder.deserialize_cuda_engine(engine_data)engines.append(engine)# create a new engine with all bindings from all engines
all_bindings = {}
for engine in engines:num_bindings = engine.num_bindingsfor i in range(num_bindings):binding_name = engine.get_binding_name(i)if engine.binding_is_input(i):shape = engine.get_binding_shape(i)dtype = engine.get_binding_dtype(i)all_bindings[binding_name] = trt.Tensor(dtype, max_input_shape[binding_name])opt_profile.set_shape(binding_name, min_input_shape[binding_name], max_input_shape[binding_name], max_input_shape)else:all_bindings[binding_name] = np.empty(engine.get_binding_shape(i), dtype=engine.get_binding_dtype(i))# create a new engine
new_engine = builder.build_engine(network=engines[0].get_network(), config=builder.create_builder_config(), profiles=[opt_profile])# create execution contexts for all engines
contexts = []
for engine in engines:context = engine.create_execution_context()contexts.append(context)new_context = new_engine.create_execution_context()# allocate memory for all bindings
bindings = []
for binding_name in all_bindings.keys():if binding_name in input_names:bindings.append(new_context.get_binding_handle(binding_name))else:for context in contexts:bindings.append(context.get_binding_handle(binding_name))# copy data from all engines to the new engine
for i in range(max_batch_size):offset = i * np.prod(max_input_shape[input_names[0]])for binding_name, buffer in all_bindings.items():if binding_name in input_names:# copy input data to new contextdata = np.random.randn(*max_input_shape[binding_name]).astype(all_bindings[binding_name].dtype)np.copyto(buffer, data.reshape(-1), casting='no')bindings[i][offset:offset+data.size] = bufferelse:# copy output data from old contexts to new contextcontext_index = (i // engine.max_batch_size)context_offset = (i % engine.max_batch_size) * np.prod(engine.get_binding_shape(i))buffer_size = np.prod(engine.get_binding_shape(i)) * engine.max_batch_sizecontext_bindings = contexts[context_index].get_binding_handle(binding_name)bindings[i][offset:offset+buffer_size] = context_bindings[context_offset:context_offset+buffer_size]# execute the new engine and verify the output
new_context.execute_v2(bindings=bindings)
output_data = bindings[output_names[0]][0]
expected_output_data = np.zeros_like(output_data)
for context in contexts:context.execute_v2(bindings=bindings[len(input_names):len(all_bindings.keys())])expected_output_data += bindings[output_names[0]][0]
np.testing.assert_allclose(output_data, expected_output_data, rtol=1e-5, atol=1e-8)

这个代码假设您已经将多个引擎文件序列化成二进制格式,并将它们保存在名为engine_file_0.trt,engine_file_1.trt等文件中。它会将所有引擎文件加载到一个列表中,然后根据所有引擎的绑定创建一个新引擎,并将所有数据从旧引擎复制到新引擎。最后,它会执行新引擎并验证输出。此代码旨在展示如何合并多个引擎文件,并可能需要根据您的具体情况进行修改。

您可以使用TensorRT的IHostMemoryIRuntime接口来将多个engine合并成一个engine。以下是合并两个engine的示例代码:

// Load the first engine
std::ifstream firstEngineFile("firstEngine.trt", std::ios::in | std::ios::binary);
firstEngineFile.seekg(0, std::ios::end);
const size_t firstEngineSize = firstEngineFile.tellg();
firstEngineFile.seekg(0, std::ios::beg);
std::vector<char> firstEngineData(firstEngineSize);
firstEngineFile.read(firstEngineData.data(), firstEngineSize);
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
nvinfer1::ICudaEngine* firstEngine = runtime->deserializeCudaEngine(firstEngineData.data(), firstEngineSize);// Load the second engine
std::ifstream secondEngineFile("secondEngine.trt", std::ios::in | std::ios::binary);
secondEngineFile.seekg(0, std::ios::end);
const size_t secondEngineSize = secondEngineFile.tellg();
secondEngineFile.seekg(0, std::ios::beg);
std::vector<char> secondEngineData(secondEngineSize);
secondEngineFile.read(secondEngineData.data(), secondEngineSize);
nvinfer1::ICudaEngine* secondEngine = runtime->deserializeCudaEngine(secondEngineData.data(), secondEngineSize);// Create a builder for the merged engine
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::INetworkDefinition* network = builder->createNetwork();// Merge the engines by copying all layers and weights from both engines to the new network
for (int i = 0; i < firstEngine->getNbBindings(); i++)
{const char* name = firstEngine->getBindingName(i);nvinfer1::ITensor* tensor = network->addInput(name, firstEngine->getBindingDataType(i), firstEngine->getBindingDimensions(i))->getOutput(0);tensor->setDynamicRange(-127.f, 127.f);tensor->setAllowedFormats(nvinfer1::TensorFormat::kLINEAR);
}for (int i = 0; i < firstEngine->getNbLayers(); i++)
{const nvinfer1::ILayer* layer = firstEngine->getLayer(i);nvinfer1::ITensor* input = layer->getInput(0);nvinfer1::ITensor* output = layer->getOutput(0);nvinfer1::ILayer* newLayer = network->addLayer(*layer);newLayer->setInput(0, input);newLayer->setOutput(0, output);
}for (int i = 0; i < secondEngine->getNbLayers(); i++)
{const nvinfer1::ILayer* layer = secondEngine->getLayer(i);nvinfer1::ITensor* input = layer->getInput(0);nvinfer1::ITensor* output = layer->getOutput(0);nvinfer1::ILayer* newLayer = network->addLayer(*layer);newLayer->setInput(0, input);newLayer->setOutput(0, output);
}for (int i = 0; i < secondEngine->getNbBindings(); i++)
{const char* name = secondEngine->getBindingName(i);const int bindingIndex = secondEngine->getBindingIndex(name);nvinfer1::ITensor* tensor = network->addInput(name, secondEngine->getBindingDataType(bindingIndex), secondEngine->getBindingDimensions(bindingIndex))->getOutput(0);tensor->setDynamicRange(-127.f, 127.f);tensor->setAllowedFormats(nvinfer1::TensorFormat::kLINEAR);
}// Build the merged engine
nvinfer1::ICudaEngine* mergedEngine = builder->buildCudaEngine(*network);
nvinfer1::IHostMemory* mergedEngineMemory = mergedEngine->serialize();// Save the merged engine to disk
std::ofstream mergedEngineFile("mergedEngine.trt", std::ios::out | std::ios::binary);
mergedEngineFile.write(reinterpret_cast<const char*>(mergedEngineMemory->data()), mergedEngineMemory->size());// Cleanup
mergedEngineMemory->destroy();
builder->destroy();
network->destroy();
firstEngine->destroy();
secondEngine->destroy();
runtime->destroy();

这个例子假设第一个engine有多个输入绑定和一个输出绑定,第二个engine有一个输入绑定和多个输出绑定。代码会创建一个新的engine,该engine从两个输入绑定开始,然后遍历两个engine的所有层,将它们复制到新的engine中,最后以两个输出绑定结束。最终的mergedEngine.trt文件将包含两个engine的所有内容。

要将两个TensorRT构建的engine文件与一个插件生成的engine文件合并为一个engine文件,需要按照以下步骤进行:

  1. 加载两个TensorRT构建的engine文件,使用 nvinfer1::IRuntime 接口的 deserializeCudaEngine 函数进行加载,得到两个 nvinfer1::ICudaEngine 实例。
// 加载第一个engine文件
std::ifstream engineFile1(engineFilePath1, std::ios::binary);
if (!engineFile1) {std::cerr << "Error opening engine file: " << engineFilePath1 << std::endl;// handle error
}
engineFile1.seekg(0, std::ifstream::end);
const size_t engineSize1 = engineFile1.tellg();
engineFile1.seekg(0, std::ifstream::beg);
std::unique_ptr<char[]> engineData1(new char[engineSize1]);
engineFile1.read(engineData1.get(), engineSize1);
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
nvinfer1::ICudaEngine* engine1 = runtime->deserializeCudaEngine(engineData1.get(), engineSize1, nullptr);// 加载第二个engine文件
std::ifstream engineFile2(engineFilePath2, std::ios::binary);
if (!engineFile2) {std::cerr << "Error opening engine file: " << engineFilePath2 << std::endl;// handle error
}
engineFile2.seekg(0, std::ifstream::end);
const size_t engineSize2 = engineFile2.tellg();
engineFile2.seekg(0, std::ifstream::beg);
std::unique_ptr<char[]> engineData2(new char[engineSize2]);
engineFile2.read(engineData2.get(), engineSize2);
nvinfer1::ICudaEngine* engine2 = runtime->deserializeCudaEngine(engineData2.get(), engineSize2, nullptr);
  1. 加载插件生成的engine文件,同样使用 deserializeCudaEngine 函数进行加载,得到一个 nvinfer1::ICudaEngine 实例。
// 加载插件生成的engine文件
std::ifstream engineFile3(engineFilePath3, std::ios::binary);
if (!engineFile3) {std::cerr << "Error opening engine file: " << engineFilePath3 << std::endl;// handle error
}
engineFile3.seekg(0, std::ifstream::end);
const size_t engineSize3 = engineFile3.tellg();
engineFile3.seekg(0, std::ifstream::beg);
std::unique_ptr<char[]> engineData3(new char[engineSize3]);
engineFile3.read(engineData3.get(), engineSize3);
nvinfer1::ICudaEngine* engine3 = runtime->deserializeCudaEngine(engineData3.get(), engineSize3, nullptr);
  1. 创建一个新的 nvinfer1::IBuilder 实例,用于合并三个engine文件。
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::INetworkDefinition* network = builder->createNetwork();
  1. 将第一个engine的所有层添加到新的network中。
for (int i = 0; i < engine1->getNbBindings(); ++i) {auto bindingName = engine1->getBindingName(i);auto bindingIndex = engine1->getBindingIndex(bindingName);auto bindingSize = engine1->getBindingDimensions(bindingIndex);auto bindingType = engine1->getBindingDataType(bindingIndex);auto bindingRole = engine1->getBindingInputOrOutput(i);if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {network->addInput(bindingName, bindingType, bindingSize);} else {network->markOutput(*engine1->getBindingName(i));}nvinfer1::ITensor* inputTensor = network->getInput(i);nvinfer1::ITensor* outputTensor = engine1->getBindingIndex(bindingName) < engine1->getNbBindings() - 1 ?engine1->getBindingTensor(bindingName) : nullptr;if (outputTensor) {nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));layer->setName(bindingName);layer->setOutputName(0, bindingName);inputTensor->setName(bindingName);inputTensor->setType(inputType);network->markOutput(*inputTensor);}bindings[i] = nullptr;if (engine1->bindingIsInput(i)) {bindings[i] = network->addInput(bindingName, bindingType, bindingSize);} else {bindings[i] = network->addConstant(bindingSize, engine1->getBindingData(bindingIndex));}engine1_input_indexes.emplace_back(i);
}
  1. 将第二个engine的所有层添加到新的network中。
for (int i = 0; i < engine2->getNbBindings(); ++i) {auto bindingName = engine2->getBindingName(i);auto bindingIndex = engine2->getBindingIndex(bindingName);auto bindingSize = engine2->getBindingDimensions(bindingIndex);auto bindingType = engine2->getBindingDataType(bindingIndex);auto bindingRole = engine2->getBindingInputOrOutput(i);if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {if (engine1_input_indexes.empty() && engine2_input_indexes.empty()) {network->addInput(bindingName, bindingType, bindingSize);} else {input_type = bindingType;input_dims = bindingSize;nvinfer1::ITensor* inputTensor = network->addInput(bindingName, bindingType, bindingSize);auto index = engine2->getBindingIndex(bindingName);auto tensor = engine2->getBindingTensor(bindingName);assert(tensor != nullptr && "Failed to locate tensor in engine");inputTensor->setDynamicRange(-1.f, 1.f);bindings[index] = inputTensor;input_tensor = inputTensor;}} else {network->markOutput(*engine2->getBindingName(i));}nvinfer1::ITensor* inputTensor = input_tensor;nvinfer1::ITensor* outputTensor = engine2->getBindingIndex(bindingName) < engine2->getNbBindings() - 1 ?engine2->getBindingTensor(bindingName) : nullptr;if (outputTensor) {nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));layer->setName(bindingName);layer->setOutputName(0, bindingName);inputTensor->setName(bindingName);inputTensor->setType(inputType);network->markOutput(*inputTensor);}bindings[i] = nullptr;if (engine2->bindingIsInput(i)) {bindings[i] = network->addInput(bindingName, bindingType, bindingSize);} else {bindings[i] = network->addConstant(bindingSize, engine2->getBindingData(bindingIndex));}engine2_input_indexes.emplace_back(i);
}
  1. 将插件生成的engine的所有层添加到新的network中。
for (int i = 0; i < engine3->getNbBindings(); ++i) {auto bindingName = engine3->getBindingName(i);auto bindingIndex = engine3->getBindingIndex(bindingName);auto bindingSize = engine3->getBindingDimensions(bindingIndex);auto bindingType = engine3->getBindingDataType(bindingIndex);auto bindingRole = engine3->getBindingInputOrOutput(i);if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {if (engine1_input_indexes.empty() && engine2_input_indexes.empty() && engine3_input_indexes.empty()) {network->addInput(bindingName, bindingType, bindingSize);} else {input_type = bindingType;input_dims = bindingSize;nvinfer1::ITensor* inputTensor = network->addInput(bindingName, bindingType, bindingSize);auto index = engine3->getBindingIndex(bindingName);auto tensor = engine3->getBindingTensor(bindingName);assert(tensor != nullptr && "Failed to locate tensor in engine");inputTensor->setDynamicRange(-1.f, 1.f);bindings[index] = inputTensor;input_tensor = inputTensor;}} else {network->markOutput(*engine3->getBindingName(i));}nvinfer1::ITensor* inputTensor = input_tensor;nvinfer1::ITensor* outputTensor = engine3->getBindingIndex(bindingName) < engine3->getNbBindings() - 1 ?engine3->getBindingTensor(bindingName) : nullptr;if (outputTensor) {nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));layer->setName(bindingName);layer->setOutputName(0, bindingName);inputTensor->setName(bindingName);inputTensor->setType(inputType);network->markOutput(*inputTensor);}bindings[i] = nullptr;if (engine3->bindingIsInput(i)) {bindings[i] = network->addInput(bindingName, bindingType, bindingSize);} else {bindings[i] = network->addConstant(bindingSize, engine3->getBindingData(bindingIndex));}engine3_input_indexes.emplace_back(i);
}
  1. 将新的 nvinfer1::INetworkDefinition 对象和设备类型传递给 nvinfer1::IBuilder 对象,使用 buildCudaEngine 函数生成合并后的engine文件。
std::string mode = "fp32";
builder->setMaxBatchSize(batchSize);
builder->setMaxWorkspaceSize(1 << 30);
builder->setFp16Mode(mode == "fp16");
builder->setInt8Mode(mode == "int8");nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
  1. 将合并后的 nvinfer1::ICudaEngine 对象序列化到文件中,得到最终的engine文件。
if (engine) {nvinfer1::IHostMemory* serialized = engine->serialize();std::ofstream engineFile(engineFilePath, std::ios::binary);if (!engineFile) {std::cerr << "Error opening engine file: " << engineFilePath << std::endl;// handle error}engineFile.write(reinterpret_cast<const char*>(serialized->data()), serialized->size());serialized->destroy();
}

完整代码示例:

#include <iostream>
#include <fstream>
#include <memory>
#include <vector>#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "NvInferRuntimeCommon.h"class Logger : public nvinfer1::ILogger {
public:void log(nvinfer1::ILogger::Severity severity, const char* msg) override {if (severity != Severity::kINFO) {std::cout << msg << std::endl;}}
} gLogger;int main() {const std::string engineFilePath1 = "/path/to/first/engine";const std::string engineFilePath2 = "/path/to/second/engine";const std::string engineFilePath3 = "/path/to/third/engine";const std::string engineFilePath = "/path/to/merged/engine";const int batchSize = 1;nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);nvinfer1::INetworkDefinition* network = builder->createNetwork();std::vector<void*> bindings(3 * batchSize);// 加载第一个engine文件std::ifstream engineFile1(engineFilePath1, std::ios::binary);if (!engineFile1) {std::cerr << "Error opening engine file: " << engineFilePath1 << std::endl;return 1;}engineFile1.seekg(0, std::ifstream::end);const size_t engineSize1 = engineFile1.tellg();engineFile1.seekg(0, std::ifstream::beg);std::unique_ptr<char[]> engineData1(new char[engineSize1]);engineFile1.read(engineData1.get(), engineSize1);nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);nvinfer1::ICudaEngine* engine1 = runtime->deserializeCudaEngine(engineData1.get(), engineSize1, nullptr);std::vector<int> engine1_input_indexes;// 加载第二个engine文件std::ifstream engineFile2(engineFilePath2, std::ios::binary);if (!engineFile2) {std::cerr << "Error opening engine file: " << engineFilePath2 << std::endl;return 1;}engineFile2.seekg(0, std::ifstream::end);const size_t engineSize2 = engineFile2.tellg();engineFile2.seekg(0, std::ifstream::beg);std::unique_ptr<char[]> engineData2(new char[engineSize2]);engineFile2.read(engineData2.get(), engineSize2);nvinfer1::ICudaEngine* engine2 = runtime->deserializeCudaEngine(engineData2.get(), engineSize2, nullptr);std::vector<int> engine2_input_indexes;// 加载插件生成的engine文件std::ifstream engineFile3(engineFilePath3, std::ios::binary);if (!engineFile3) {std::cerr << "Error opening engine file: " << engineFilePath3 << std::endl;return 1;}engineFile3.seekg(0, std::ifstream::end);const size_t engineSize3 = engineFile3.tellg();engineFile3.seekg(0, std::ifstream::beg);std::unique_ptr<char[]> engineData3(new char[engineSize3]);engineFile3.read(engineData3.get(), engineSize3);nvinfer1::ICudaEngine* engine3 = runtime->deserializeCudaEngine(engineData3.get(), engineSize3, nullptr);std::vector<int> engine3_input_indexes;// 将第一个engine的所有层添加到新的network中for (int i = 0; i < engine1->getNbBindings(); ++i) {auto bindingName = engine1->getBindingName(i);auto bindingIndex = engine1->getBindingIndex(bindingName);auto bindingSize = engine1->getBindingDimensions(bindingIndex);auto bindingType = engine1->getBindingDataType(bindingIndex);auto bindingRole = engine1->getBindingInputOrOutput(i);if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {network->addInput(bindingName, bindingType, bindingSize);} else {network->markOutput(*engine1->getBindingName(i));}nvinfer1::ITensor* inputTensor = network->getInput(i);nvinfer1::ITensor* outputTensor = engine1->getBindingIndex(bindingName) < engine1->getNbBindings() - 1 ?engine1->getBindingTensor(bindingName) : nullptr;if (outputTensor) {nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));layer->setName(bindingName);layer->setOutputName(0, bindingName);inputTensor->setName(bindingName);inputTensor->setType(inputType);network->markOutput(*inputTensor);}bindings[i] = nullptr;if (engine1->bindingIsInput(i)) {bindings[i] = network->addInput(bindingName, bindingType, bindingSize);} else {bindings[i] = network->addConstant(bindingSize, engine1->getBindingData(bindingIndex));}engine1_input_indexes.emplace_back(i);}// 将第二个engine的所有

可以通过以下步骤将两个tensorrt构建的engine文件与一个plugin生成的engine文件合并为一个engine文件:

  1. 使用TensorRT API加载两个engine文件和一个带有自定义plugin代码的插件库,创建两个Execution contexts。

  2. 使用TensorRT API获取两个engine文件的输入和输出张量名称,并创建一个新的engine文件。

  3. 使用TensorRT API将第一个engine文件的输入和输出张量添加到新的engine文件中。

  4. 使用TensorRT API将第二个engine文件的输入和输出张量添加到新的engine文件中。

  5. 使用TensorRT API将自定义插件代码添加到新的engine文件中。

  6. 使用TensorRT API编译并序列化新的engine文件。

以下是代码示例:

#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvPlugin.h"using namespace nvinfer1;int main() {// Load the first engine fileIRuntime* runtime1 = createInferRuntime(gLogger);std::ifstream file1("engine1.engine", std::ios::binary);file1.seekg(0, std::ios::end);const int modelSize1 = file1.tellg();file1.seekg(0, std::ios::beg);std::unique_ptr<char[]> modelData1(new char[modelSize1]);file1.read(modelData1.get(), modelSize1);file1.close();ICudaEngine* engine1 = runtime1->deserializeCudaEngine(modelData1.get(), modelSize1, nullptr);// Load the second engine fileIRuntime* runtime2 = createInferRuntime(gLogger);std::ifstream file2("engine2.engine", std::ios::binary);file2.seekg(0, std::ios::end);const int modelSize2 = file2.tellg();file2.seekg(0, std::ios::beg);std::unique_ptr<char[]> modelData2(new char[modelSize2]);file2.read(modelData2.get(), modelSize2);file2.close();ICudaEngine* engine2 = runtime2->deserializeCudaEngine(modelData2.get(), modelSize2, nullptr);// Create a new engine with the inputs and outputs from both enginesIBuilder* builder = createInferBuilder(gLogger);INetworkDefinition* network = builder->createNetwork();// Add the inputs and outputs from the first engine to the new enginefor (int i = 0; i < engine1->getNbBindings(); i++) {std::string name = engine1->getBindingName(i);Dims dims = engine1->getBindingDimensions(i);DataType type = engine1->getBindingDataType(i);bool isInput = engine1->bindingIsInput(i);if (isInput) {network->addInput(name.c_str(), type, dims);} else {network->addOutput(name.c_str(), type, dims);}}// Add the inputs and outputs from the second engine to the new enginefor (int i = 0; i < engine2->getNbBindings(); i++) {std::string name = engine2->getBindingName(i);Dims dims = engine2->getBindingDimensions(i);DataType type = engine2->getBindingDataType(i);bool isInput = engine2->bindingIsInput(i);if (isInput) {network->addInput(name.c_str(), type, dims);} else {network->addOutput(name.c_str(), type, dims);}}// Add the custom plugin to the new enginePluginFactory pluginFactory;ITensor* inputTensor = network->getInput(0);ITensor* outputTensor = network->getOutput(0);IPluginV2Layer* customLayer = pluginFactory.createPlugin("customPlugin", inputTensor, outputTensor, 1);network->addPluginV2(&inputTensor, 1, customLayer);// Build and serialize the new enginebuilder->setMaxBatchSize(maxBatchSize);builder->setMaxWorkspaceSize(maxWorkspaceSize);ICudaEngine* newEngine = builder->buildCudaEngine(*network);IHostMemory* serializedEngine = newEngine->serialize();std::ofstream file("combined.engine", std::ios::binary);file.write((char*) serializedEngine->data(), serializedEngine->size());file.close();// Cleanupbuilder->destroy();network->destroy();newEngine->destroy();serializedEngine->destroy();engine1->destroy();engine2->destroy();runtime1->destroy();runtime2->destroy();return 0;
}

在代码示例中,我们使用两个IRuntime实例加载两个engine文件,获取其中的输入和输出张量,并创建一个新的engine文件。然后,我们使用IBuilder创建一个新的网络,并将两个engine文件的输入和输出张量添加到新的网络中。最后,我们使用PluginFactory创建一个自定义插件,并将其添加到新的网络中。使用IBuilder编译新的网络,并使用ICudaEngine序列化并保存到文件。最后,我们清理创建的资源。


http://www.ppmy.cn/news/95939.html

相关文章

内向的软件开发工程师如何在职场站稳阵脚?

本文框架 1.前言2. 几点个人心得2.1 要有自己拿得出手的模块2.2 善于整理归纳2.3 经常展示自己2.4 方法与努力 1.前言 最近跟一个博客上认识的朋友一起聊天&#xff0c;他基本情况是从其他监控设备行业转行到汽车电子做软件开发工程师一年左右&#xff0c;总感觉在团队中找不到…

JS笔记--Web APIS(下)

# Web APIs - 第5天笔记 ## 定时器 -延迟函数 JavaScript 内置的一个用来让代码延迟执行的函数&#xff0c;叫 setTimeout 语法&#xff1a; ~~~JavaScript setTimeout(回调函数, 延迟时间) ~~~ setTimeout 仅仅只执行一次&#xff0c;所以可以理解为就是把一段代码延迟…

http请求和响应(包含状态码)+过滤器

目录 一、http协议概述 二、http请求 三、http响应 四、过滤器 一、http协议概述 1.http&#xff1a;超文本传输协议&#xff0c;是用于在网络上传输数据的应用层协议。是互联网上应用最为流行的一种网络协议,用于定义客户端浏览器和服务器之间交换数据的过程&#xff0c;基…

单元测试(unit testing)到底是什么?

引言 做开发的同学应该都听说过单元测试&#xff08;unit testing&#xff09;&#xff0c;因为对单元测试的理解程度不同&#xff0c;所以对单元测试的看法也可能有所不同。本文就来深入讲解一下单元测试的概念、作用和是否需要做单元测试。 什么是单元测试&#xff08;unit…

【循环自相关和循环谱系列7】OFDM循环自相关推导分析、时间参数估计原理仿真及某无人机实际图传信号验证(含矩形/非矩形、有无循环前缀等情况)

重要声明:为防止爬虫和盗版贩卖,文章中的核心代码可凭【CSDN订阅截图或公z号付费截图】私信免费领取,一律不认其他渠道付费截图! 说明:本博客含大量公式推导分析,比较烧脑,需要有一定的数学基础,高等数学、信号与系统等! 这是循环自相关和循环谱系列的第七篇文章了…

详解c++STL—STL常用算法

目录 1、常用遍历算法 1.1、for_each 1.2、transform 2、常用查找算法 2.1、find 2.2、find_if 2.3、adjacent_find 2.4、binary_search 2.5、count 2.6、count_if 3、常用排序算法 3.1、sort 3.2、random_shuffle 3.3、merge 3.4、reverse 4、常用拷贝和替换算…

MySQL高频面试题

什么是DDL、DML、DQL DDL&#xff08;数据定义语言&#xff09;&#xff0c;用来定义&#xff08;创建删除修改&#xff09;数据库对象&#xff08;数据库、表、字段&#xff09; DML&#xff08;数据操纵语言&#xff09;&#xff0c;用来对数据库表中的数据进行增删查改&am…

ssm+springboot+java高校图书馆图书借阅座位预约管理系统系统

陕理工图书馆管理系统包括多个功能模块&#xff1a;图书类别管理模块、图书管理模块、读者管理模块、借阅管理模块、预约管理、推荐管理。管理员登入后&#xff0c;维护图书借阅的信息。本文介绍了使用Java技术开发陕理工图书馆管理系统的设计与实现过程&#xff0c;首先对实现…