Description
I’m working on direct pytorch to tensorrt conversion of YOLOv4-csp from GitHub - WongKinYiu/ScaledYOLOv4: Scaled-YOLOv4: Scaling Cross Stage Partial Network
I receive the following segfault when trying to add int8 calibration to an already working engine build:
main_arena (Unknown Source:0)
libnvinfer.so.7![Unknown/Just-In-Time compiled code] (Unknown Source:0)
libnvinfer.so.7!nvinfer1::builder::buildEngine(nvinfer1::NetworkBuildConfig&, nvinfer1::NetworkQuantizationConfig const&, nvinfer1::builder::EngineBuildContext const&, nvinfer1::Network const&) (Unknown Source:0)
libnvinfer.so.7!nvinfer1::builder::Builder::buildInternal(nvinfer1::NetworkBuildConfig&, nvinfer1::NetworkQuantizationConfig const&, nvinfer1::builder::EngineBuildContext const&, nvinfer1::Network const&) (Unknown Source:0)
libnvinfer.so.7!nvinfer1::builder::Builder::buildEngineWithConfig(nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&) (Unknown Source:0)
tensorrt.so!void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<nvinfer1::ICudaEngine*, nvinfer1::IBuilder, nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::arg, pybind11::arg, char const*>(nvinfer1::ICudaEngine* (nvinfer1::IBuilder::)(nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::arg const&, pybind11::arg const&, char const const&)::{lambda(nvinfer1::IBuilder*, nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&)#1}, nvinfer1::ICudaEngine*, nvinfer1::IBuilder*, nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::arg, pybind11::arg, char const*>(pybind11::cpp_function::initialize<nvinfer1::ICudaEngine*, nvinfer1::IBuilder, nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::arg, pybind11::arg, char const*>(nvinfer1::ICudaEngine* (nvinfer1::IBuilder::)(nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::arg const&, pybind11::arg const&, char const const&)::{lambda(nvinfer1::IBuilder*, nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&)#1}&&, nvinfer1::ICudaEngine* ()(nvinfer1::IBuilder, nvinfer1::INetworkDefinition&, nvinfer1::IBuilderConfig&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::arg const&, pybind11::arg const&, char const* const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN(pybind11::detail::function_call) [clone .lto_priv.2423] (Unknown Source:0)
tensorrt.so!pybind11::cpp_function::dispatcher(_object*, _object*, _object*) (Unknown Source:0)
_PyCFunction_FastCallDict(PyObject * func_obj, PyObject ** args, Py_ssize_t nargs, PyObject * kwargs) (/tmp/build/80754af9/python_1588903631989/work/Objects/methodobject.c:231)
_PyCFunction_FastCallKeywords(PyObject * func, PyObject ** stack, Py_ssize_t nargs, PyObject * kwnames) (/tmp/build/80754af9/python_1588903631989/work/Objects/methodobject.c:294)
call_function(PyObject *** pp_stack, Py_ssize_t oparg, PyObject * kwnames) (/tmp/build/80754af9/python_1588903631989/work/Python/ceval.c:4875)
_PyEval_EvalFrameDefault(PyFrameObject * f, int throwflag) (/tmp/build/80754af9/python_1588903631989/work/Python/ceval.c:3351)
_PyEval_EvalCodeWithName(PyObject * qualname, PyObject * name, PyObject * closure, PyObject * kwdefs, Py_ssize_t defcount, PyObject ** defs, int kwstep, Py_ssize_t kwcount, PyObject ** kwargs, PyObject ** kwnames, Py_ssize_t argcount, PyObject ** args, PyObject * locals, PyObject * globals, PyObject * _co) (/tmp/build/80754af9/python_1588903631989/work/Python/ceval.c:4166)
PyEval_EvalCodeEx(PyObject * _co, PyObject * globals, PyObject * locals, PyObject ** args, int argcount, PyObject ** kws, int kwcount, PyObject ** defs, int defcount, PyObject * kwdefs, PyObject * closure) (/tmp/build/80754af9/python_1588903631989/work/Python/ceval.c:4187)
PyEval_EvalCode(PyObject * co, PyObject * globals, PyObject * locals) (/tmp/build/80754af9/python_1588903631989/work/Python/ceval.c:731)
run_mod(mod_ty mod, PyObject * filename, PyObject * globals, PyObject * locals, PyCompilerFlags * flags, PyArena * arena) (/tmp/build/80754af9/python_1588903631989/work/Python/pythonrun.c:1025)
PyRun_FileExFlags(FILE * fp, const char * filename_str, int start, PyObject * globals, PyObject * locals, int closeit, PyCompilerFlags * flags) (/tmp/build/80754af9/python_1588903631989/work/Python/pythonrun.c:978)
PyRun_SimpleFileExFlags(FILE * fp, const char * filename, int closeit, PyCompilerFlags * flags) (/tmp/build/80754af9/python_1588903631989/work/Python/pythonrun.c:419)
run_file(PyCompilerFlags * p_cf, const wchar_t * filename, FILE * fp) (/tmp/build/80754af9/python_1588903631989/work/Modules/main.c:340)
Py_Main(int argc, wchar_t ** argv) (/tmp/build/80754af9/python_1588903631989/work/Modules/main.c:811)
main(int argc, char ** argv) (/tmp/build/80754af9/python_1588903631989/work/Programs/python.c:69)
Environment
TensorRT Version: 7.1.3.4
GPU Type: rtx 2070
Nvidia Driver Version: 460.91.03
CUDA Version: 11.2
CUDNN Version: 8.0.2.39-1+cuda11.0
Operating System + Version: ubuntu20.04
Python Version (if applicable): 3.6
PyTorch Version (if applicable): 1.7.0a0+8deb4fe
Baremetal or Container (if container which image + tag): nvcr.io/nvidia/pytorch:20.08-py3
Steps To Reproduce
I have a working conversion process that uses the following code:
config = builder.create_builder_config()
#config.int8_calibrator = Calibrator('/data/images/')
config.max_workspace_size = int(1e6)
engine = builder.build_engine(network, config=config)
I've also tried the following as suggested by https://github.com/NVIDIA/TensorRT/issues/927
config = builder.create_builder_config()
config.int8_calibrator = Calibrator('/data/images/')
config.set_flag(trt.BuilderFlag.INT8)
config.max_workspace_size = int(1e6)
engine = builder.build_engine(network, config)
However if I uncomment the int8_calibrator line, I get the previously mentioned segmentation fault.
My int8 calibrator is as follows:
class Calibrator(trt.IInt8Calibrator):
def __init__(self, image_path):
self.imgs = glob.glob(image_path + '/*.jpg')
print('Found {} images'.format(len(self.imgs)))
self.cache_file = 'weights/calibration.cache'
self.idx = 0
self.dev_ptr = None
def get_algorithm(self):
return trt.CalibrationAlgoType.MINMAX_CALIBRATION
def write_calibration_cache(self, cache):
with open(self.cache_file, "wb") as f:
f.write(cache)
def read_calibration_cache(self):
# If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
if os.path.exists(self.cache_file):
print('Reading calibration cache')
with open(self.cache_file, "rb") as f:
return f.read()
print('No calibration cache found')
return None
def get_batch_size(self):
return 1
def get_batch(self, tensor_names):
if self.idx >= len(self.imgs):
print('Unable to load any images')
return []
img = self.imgs[self.idx]
print(f'Loading image {img}')
img = cv2.imread(img, cv2.IMREAD_UNCHANGED)
img = cv2.resize(img, (INPUT_SHAPE[2], INPUT_SHAPE[1]))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# now upload the image to the device
img = torch.from_numpy(img).cuda().permute(2,0,1).contiguous()
self.dev_ptr = img
self.idx += 1
return [int(self.dev_ptr.data_ptr())]
The output log is as follows:
[TensorRT] VERBOSE: Formats and tactics selection completed in 22.1625 seconds.
[TensorRT] VERBOSE: After reformat layers: 388 layers
[TensorRT] VERBOSE: Block size 70778880
[TensorRT] VERBOSE: Block size 70778880
[TensorRT] VERBOSE: Block size 17694720
[TensorRT] VERBOSE: Block size 17694720
[TensorRT] VERBOSE: Block size 8847360
[TensorRT] VERBOSE: Block size 2211840
[TensorRT] VERBOSE: Block size 1105920
[TensorRT] VERBOSE: Block size 1105920
[TensorRT] VERBOSE: Block size 100352
[TensorRT] VERBOSE: Total Activation Memory: 190318592
[TensorRT] INFO: Detected 1 inputs and 3 output network tensors.
Segmentation fault (core dumped)
What am I doing wrong? I’ve also tried setting the calibrator through Builder.int8_calibrator = Calibrator(…)