Hardware platform: Jetson AGX Xavier
Jetpack version: JetPack-L4T-4.1.1-linux-x64_b57
GPU type: 512-core Volta GPU with Tensor Cores
linux version: Linux jetson-0423718016883 4.9.108-tegra
CUDA version: CUDA Version 10.0.117
CUDNN version: cudnn7.3.1.20
TensorRT version: 5.0.3.2-1
Problem discription:
When I try to build the INT8 inferencing engine of faster rcnn caffe model with C++,I meet a error like this:
Begin transform caffemodel to trtmodel
Begin parsing model...INT8
End parsing model...
Begin building engine...INT8
Read Calibration Cache from model_A/predict/int8/CalibrationTable
ERROR: ../builder/cudnnBuilderUtils.cpp (255) - Cuda Error in findFastestTactic: 4
ERROR: runtime.cpp (30) - Cuda Error in free: 4
terminate called after throwing an instance of 'nvinfer1::CudaError'
what(): std::exception
Aborted
This is my code:
// create the builder
IBuilder* builder = createInferBuilder(gLogger);
// parse the caffe model to populate the network, then set the outputs
INetworkDefinition* network = builder->createNetwork();
ICaffeParser* parser = createCaffeParser();
parser->setPluginFactoryV2(pluginFactory);
std::cout << "Begin parsing model...INT8" << std::endl;
const IBlobNameToTensor* blobNameToTensor = parser->parse(deployFile.c_str(),
modelFile.c_str(),
*network,
DataType::kINT8);
std::cout << "End parsing model..." << std::endl;
// specify which tensors are outputs
for (auto& s : outputs)
network->markOutput(*blobNameToTensor->find(s.c_str()));
// Build the engine
builder->setMaxBatchSize(maxBatchSize);
builder->setMaxWorkspaceSize(1 << 30); // we need about 6MB of scratch space for the plugin layer for batch size 5
builder->setAverageFindIterations(1);
builder->setMinFindIterations(1);
builder->setDebugSync(true);
builder->setInt8Mode(true);
//DimsCHW mDims = static_cast<DimsCHW&&>(blobNameToTensor->find(INPUT_BLOB_NAME0)->getDimensions());
DimsCHW mDims(INPUT_C, INPUT_H, INPUT_W);
DataLoader dataLoader(imageSetFile, imagePath,
BATCH_SIZE,mDims.w(),mDims.h(),
mDims.c());
Int8EntropyCalibrator calibrator(&dataLoader,
mDims.c(),mDims.h(),mDims.w(),
true,cachePath);
builder->setInt8Calibrator(&calibrator);
builder->setDefaultDeviceType(DeviceType::kGPU);
std::cout << "Begin building engine...INT8" << std::endl;
ICudaEngine* engine = builder->buildCudaEngine(*network);
assert(engine);
std::cout << "End building engine..." << std::endl;
// we don't need the network any more, and we can destroy the parser
network->destroy();
parser->destroy();
// serialize the engine, then close everything down
(*trtModelStream) = engine->serialize();
engine->destroy();
builder->destroy();
shutdownProtobufLibrary();
I’m sure the calibration cache file is existing which display as flow:
conv2_1: 41ee1723
conv1_2: 41946c10
conv5_2: 3f02d95c
pool1: 41b3272e
conv5_3: 3e8adcd5
rpn_cls_score: 3dbf0d0b
fc7: 3cfbbd51
fc6: 3dcee4c1
cls_score: 3e2f3ce3
conv5_1: 3fb761ec
rois: 4030ecd2
rpn_cls_prob_reshape: 3c010a14
rpn_bbox_pred: 3c7135fc
rpn_cls_score_reshape: 3dbf0d0b
bbox_pred: 3bc5363a
rpn_cls_prob: 3c010a14
conv4_2: 40f5dd99
pool4: 40846b6a
conv4_1: 418d405a
conv3_1: 42454800
pool3: 4211f483
conv2_2: 424e076c
cls_prob: 3c02f5eb
rpn/output: 3d84a663
conv4_3: 4051dcfd
conv3_2: 421d7f72
conv3_3: 4206fece
im_info: 40316ddc
pool2: 42709c6b
conv1_1: 4093748e
pool5: 3ea73e05
data: 3f99411a
And I have transform the same caffemodel to FP16 trtmodel successfully.So, I don’t know what’s the problem.