Linux distro and version - Ubuntu 16.04
GPU type - Tesla K80
nvidia driver version - 390.46
CUDA version - 9.0
CUDNN version - 7.3
Python version [if using python] - 3.5
Tensorflow version 1.7.0
TensorRT version - 5
Describe the problem
Conversion to tensorRT engine was successful, wrote it to the engine file.
uff_model = measure(lambda: uff.from_tensorflow_frozen_model(frozen_graph_filename, output_names), 'uff.from_tensorflow')
builder = trt.Builder(G_LOGGER)
builder.max_batch_size = 1
builder.max_workspace_size = 1 << 30
network = builder.create_network()
parser2 = trt.UffParser()
parser2.register_input(input_names[0], (channel, height, width))
parser2.parse_buffer(uff_model, network)
engine2 = builder.build_cuda_engine(network)
with open("new_engine_1.engine", "wb") as f:
f.write(engine2.serialize())
While loading the file for inference.
def get_engine(engine_file):
with open(engine_file,"rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
print("Engine Loaded")
return runtime.deserialize_cuda_engine(f.read())
return None
def infer(engine, x, batch_size):
n = engine.num_bindings
print('%d bindings' % n)
print(x.shape)
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(trt.float32))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(trt.float32))
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
stream = cuda.Stream()
context = engine.create_execution_context()
cuda.memcpy_htod_async(d_input, h_input, stream)
context.execute_async(batch_size,bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
cuda.memcpy_dtoh_async(h_output, d_output, stream)
stream.synchronize()
return h_output
Output is shown -
2 bindings
(3, 368, 368)
[TensorRT] ERROR: cuda/cudaConvolutionLayer.cpp (163) - Cudnn Error in execute: 7
[TensorRT] ERROR: cuda/cudaConvolutionLayer.cpp (163) - Cudnn Error in execute: 7
The engine is getting error at execute.