Description
I am trying to convert a caffe model (res10_300x300_ssd_iter_140000.caffemodel) into tensorRT but ICaffeParser* parser = createCaffeParser(); could not able to parse Normalised layer.
layer {
name: "conv4_3_norm"
type: "Normalize"
bottom: "layer_256_1_bn1"
top: "conv4_3_norm"
norm_param {
across_spatial: false
scale_filler {
type: "constant"
value: 20
}
channel_shared: false
}
}
Environment
TensorRT Version: 8.0.1
GPU Type: NVIDIA Tegra X2 (nvgpu)/integrated (Jetson TX2)
Nvidia Driver Version:
CUDA Version: 10.2
CUDNN Version:
Operating System + Version:
Python Version (if applicable):
TensorFlow Version (if applicable):
PyTorch Version (if applicable):
Baremetal or Container (if container which image + tag):
Relevant Files
Model and deploy file link: computer_vision/CAFFE_DNN at master · gopinath-balu/computer_vision · GitHub
Sample Code:
#include <assert.h>
#include <sys/stat.h>
#include <time.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <cmath>
#include <algorithm>
#include <vector>
#include <memory>
#include <cuda_runtime_api.h>
#include <NvInferRuntime.h>
#include "NvCaffeParser.h"
#include "NvInfer.h"
using namespace nvinfer1;
using namespace nvcaffeparser1;
using namespace std;
// Attributes of ResNet10 SSD Caffe model
static const int INPUT_H = 300;
static const int INPUT_W = 300;
static const int OUTPUT_SIZE = 2;
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "detection_out"; //"prob"
const std::string data_dir = "/home/jetsonhack/tensorRT/caffeParserCPP/data/";
// Logger for GIE info/warning/errors
class CLogger : public nvinfer1::ILogger
{
public:
void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
{
// suppress info-level messages
if (severity != nvinfer1::ILogger::Severity::kINFO)
{
std::cout << msg << std::endl;
}
}
} gLogger;
void caffeToTRTModel(const std::string& deployFilepath, // Path of Caffe prototxt file
const std::string& modelFilepath, // Path of Caffe model file
const std::vector<std::string>& outputs, // Names of network outputs
unsigned int maxBatchSize, // Note: Must be at least as large as the batch we want to run with
IHostMemory*& trtModelStream) // Output buffer for the TRT model
{
// Create builder
IBuilder* builder = createInferBuilder(gLogger);
// Parse caffe model to populate network, then set the outputs
std::cout << "Reading Caffe prototxt: " << deployFilepath << "\n";
std::cout << "Reading Caffe model: " << modelFilepath << "\n";
//INetworkDefinition* network = builder->createNetwork();
INetworkDefinition* network = builder->createNetworkV2(0);
ICaffeParser* parser = createCaffeParser();
bool useFp16 = builder->platformHasFastFp16();
std::cout << "platformHasFastFp16: " << useFp16 << "\n";
bool useInt8 = builder->platformHasFastInt8();
std::cout << "platformHasFastInt8: " << useInt8 << "\n";
// create a 16-bit model if it's natively supported
nvinfer1::DataType modelDataType = useFp16 ? DataType::kHALF : DataType::kFLOAT;
//nvinfer1::DataType modelDataType = (precision == TYPE_FP16) ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; // import INT8 weights as FP32
const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(deployFilepath.c_str(),
modelFilepath.c_str(),
*network,
modelDataType);
// Specify output tensors of network
// ERROR: Network must have at least one output
for (auto& s : outputs){
std::cout<<"output = "<< s.c_str() << std::endl;
network->markOutput(*blobNameToTensor->find(s.c_str())); // prob
}
builder->setMaxBatchSize(maxBatchSize);
builder->setMaxBatchSize(1 << 20);
// set up the network for paired-fp16 format if available
//if(useFp16)
//builder->setFp16Mode(true);
// Build engine
//ICudaEngine* engine = builder->buildCudaEngine(*network);
nvinfer1::IBuilderConfig* builderConfig = builder->createBuilderConfig();
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *builderConfig);
assert(engine);
// Destroy parser and network
network->destroy();
parser->destroy();
// Serialize engine and destroy it
trtModelStream = engine->serialize();
engine->destroy();
builder->destroy();
shutdownProtobufLibrary();
}
void SaveEngine(const nvinfer1::IHostMemory& trtModelStream, const std::string& engine_filepath)
{
std::ofstream file;
file.open(engine_filepath, std::ios::binary | std::ios::out);
if(!file.is_open())
{
std::cout << "read create engine file" << engine_filepath <<" failed" << std::endl;
return;
}
file.write((const char*)trtModelStream.data(), trtModelStream.size());
file.close();
};
ICudaEngine* LoadEngine(IRuntime& runtime, const std::string& engine_filepath)
{
std::ifstream file;
file.open(engine_filepath, ios::binary | ios::in);
file.seekg(0, ios::end);
int length = file.tellg();
file.seekg(0, ios::beg);
std::shared_ptr<char> data(new char[length], std::default_delete<char[]>());
file.read(data.get(), length);
file.close();
// runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr);
ICudaEngine* engine = runtime.deserializeCudaEngine(data.get(), length, nullptr);
assert(engine != nullptr);
return engine;
}
void demo_save_caffe_to_trt(const std::string& engine_filepath)
{
std::string deploy_filepath = data_dir + "deploy.prototxt";
std::string model_filepath = data_dir + "res10_300x300_ssd_iter_140000.caffemodel";
// Create TRT model from caffe model and serialize it to a stream
IHostMemory* trtModelStream{nullptr};
caffeToTRTModel(deploy_filepath, model_filepath, std::vector<std::string>{OUTPUT_BLOB_NAME}, 1, trtModelStream);
assert(trtModelStream != nullptr);
SaveEngine(*trtModelStream, engine_filepath);
// destroy stream
trtModelStream->destroy();
}
int trTdemo()
{
bool use_caffe = true;
std::string engine_filepath;
if (use_caffe)
{
engine_filepath = "/home/jetsonhack/tensorRT/caffeParserCPP/data/caffe_ssd_useInt8.trt";
demo_save_caffe_to_trt(engine_filepath);
}
else
{
std::cout << "ONNX Model not available" << std::endl;
}
std::cout<<"[API] Save engine to "<< engine_filepath << std::endl;
}
int main(int argc, char** argv)
{
trTdemo();
return 0;
}
Output:
jetsonhack@jetsonhack-desktop:~/tensorRT/caffeParserCPP/build$ ./nvidia_example
Reading Caffe prototxt: /home/jetsonhack/tensorRT/caffeParserCPP/data/deploy.prototxt
Reading Caffe model: /home/jetsonhack/tensorRT/caffeParserCPP/data/res10_300x300_ssd_iter_140000.caffemodel
platformHasFastFp16: 1
platformHasFastInt8: 0
could not parse layer type Normalize
output = detection_out
Segmentation fault (core dumped)