Hi Tom,
Thank you for your answer, the typo is a pasting error.
Yes, I write in binary mode too.
My model has a custom layer, and I can see nvidia sample used PluginFactory.
When I deserializeCudaEngine from stream, the code worked. But from a disk file, the code went core dumped.
From the gdb trace, I can’t see where the crash come from.
// deserializeCudaEngine
IRuntime* runtime = createInferRuntime(gSSHLogger.getTRTLogger());
assert(runtime != nullptr);
PluginFactory pluginFactory;
ICudaEngine* engine = nullptr;
if (trtModelStream != nullptr)
{
engine = runtime->deserializeCudaEngine(
trtModelStream->data(), trtModelStream->size(), &pluginFactory);
}
else
{
gLogInfo << "read gie model 1..." << std::endl;
std::ifstream engineFile(cache_path, std::ifstream::binary);
engineFile.seekg(0, engineFile.end);
long int fsize = engineFile.tellg();
engineFile.seekg(0, engineFile.beg);
std::vector<char> engineData(fsize);
engineFile.read(reinterpret_cast<char*>(engineData.data()), fsize);
gLogInfo << "read gie model 9..." << std::endl;
engine = runtime->deserializeCudaEngine(engineData.data(), fsize, &pluginFactory);
gLogInfo << "read gie model 2..." << std::endl;
}
// serialize
PluginFactory parserPluginFactory;
caffeToTRTModel(
"test_ssh.prototxt",
"SSH.caffemodel",
std::vector<std::string> {OUTPUT_BLOB_NAME0, OUTPUT_BLOB_NAME1},
N, &parserPluginFactory, trtModelStream);
parserPluginFactory.destroyPlugin();
assert(trtModelStream != nullptr);
saveGIEModel(trtModelStream, &cache_path);
void caffeToTRTModel(const std::string& deployFile, // Name for caffe prototxt
const std::string& modelFile, // Name for model
const std::vector<std::string>& outputs, // Network outputs
unsigned int maxBatchSize, // Batch size - NB must be at least as large as the batch we want to run with)
nvcaffeparser1::IPluginFactoryExt* pluginFactory, // factory for plugin layers
IHostMemory*& trtModelStream) // Output stream for the TensorRT model
{
// Create the builder
IBuilder* builder = createInferBuilder(gSSHLogger.getTRTLogger());
assert(builder != nullptr);
// Parse the caffe model to populate the network, then set the outputs
INetworkDefinition* network = builder->createNetwork();
ICaffeParser* parser = createCaffeParser();
parser->setPluginFactoryExt(pluginFactory);
bool fp16 = builder->platformHasFastFp16();
const IBlobNameToTensor* blobNameToTensor = parser->parse(locateMyFile(deployFile).c_str(),
locateMyFile(modelFile).c_str(),
*network, fp16 ? DataType::kHALF : DataType::kFLOAT);
gLogInfo << "support fp16: " << fp16 << std::endl;
// Specify which tensors are outputs
for (auto& s : outputs)
network->markOutput(*blobNameToTensor->find(s.c_str()));
// Build the engine
builder->setMaxBatchSize(maxBatchSize);
builder->setMaxWorkspaceSize(10 << 20); // We need about 6MB of scratch space for the plugin layer for batch size 5
builder->setFp16Mode(fp16);
gLogInfo << "Begin building engine..." << std::endl;
ICudaEngine* engine = builder->buildCudaEngine(*network);
assert(engine);
gLogInfo << "End building engine..." << std::endl;
// We don't need the network any more, and we can destroy the parser
network->destroy();
parser->destroy();
// Serialize the engine, then close everything down
trtModelStream = engine->serialize();
engine->destroy();
builder->destroy();
shutdownProtobufLibrary();
}
void saveGIEModel(IHostMemory*& trtModelStream, std::string* cache_path)
{
std::ofstream ofs(*cache_path, std::ofstream::binary);
ofs.write(reinterpret_cast<char*>(trtModelStream->data()), trtModelStream->size());
ofs.close();
}
the gdb trace:
[New Thread 0x7fffcf75e700 (LWP 32186)]
[New Thread 0x7fffcef5d700 (LWP 32187)]
[New Thread 0x7fffce6db700 (LWP 32195)]
[I] build engine 1...
[I] read gie model 1...
[I] read gie model 9...
Thread 1 "sample_SSH" received signal SIGSEGV, Segmentation fault.
0x0000555555563998 in std::vector<float, std::allocator<float> >::size() const ()
(gdb)
Best regards,
Amy