Internal Error (Parameter check failed at: runtime/api/executionContext.cpp::resolveSlots::1495, condition: allInputDimensionsSpecified(routine)


I’d like to run an engine with dynamic batch size. As of now, my goal is to run the model and test it. Performance is not important for now. I looked at the examples from the official repository Copied from TensorRT/samples/python at main · NVIDIA/TensorRT · GitHub but when I run my code the command context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) throws the following error:

[TensorRT] ERROR: 3: [executionContext.cpp::resolveSlots::1495] Error Code 3: Internal Error (Parameter check failed at: runtime/api/executionContext.cpp::resolveSlots::1495, condition: allInputDimensionsSpecified(routine)
[TensorRT] ERROR: 2: [executionContext.cpp::enqueueInternal::360] Error Code 2: Internal Error (Could not resolve slots: )


I am running my code within the official Deepstream 6.0.1 container which uses TensorRT ‘’.

Steps To Reproduce

Here’s my code:

import numpy as np
import requests
from PIL import Image
import tensorrt as trt
import torch
from torchvision import transforms
import torchvision.transforms.functional as F

import pycuda.driver as cuda
import pycuda.autoinit

class HostDeviceMem(object):
    """ Copied from """
    def __init__(self, host_mem, device_mem): = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str( + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

class MyModel:

    def __init__(self, engine_path):
        self.engine_path = engine_path
        self.logger = trt.Logger(trt.Logger.WARNING)
        self.runtime = trt.Runtime(self.logger)
        self.engine = self.load_engine(self.runtime, self.engine_path)
        self.inputs, self.outputs, self.bindings, = self.allocate_buffers(self.engine)
        self.context = self.engine.create_execution_context()

        # PyTorch preprocessing
        IMAGE_SIZE = 224
        NORMALIZE_MEAN = torch.tensor([0.485, 0.456, 0.406])
        NORMALIZE_STD = torch.tensor([0.226, 0.226, 0.266])
        self.preprocessing_transforms = transforms.Compose([
            transforms.Normalize(mean=NORMALIZE_MEAN, std=NORMALIZE_STD),   # todo: is it between -1 and 1?
            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        self.input_dtype = np.float32

    def download_image(image_url: str) -> Image.Image:
        return, stream=True).raw)

    def load_engine(trt_runtime, engine_path):
        """ Copied from """
        trt.init_libnvinfer_plugins(None, "")
        with open(engine_path, 'rb') as f:
            engine_data =
        engine = trt_runtime.deserialize_cuda_engine(engine_data)
        return engine

    def allocate_buffers(engine):
        """ Copied from """
        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()
        for binding in engine:
            size = abs(trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size)
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings, stream

    def do_inference_v2(context, bindings, inputs, outputs, stream):
        """ Copied from """
        # Transfer input data to the GPU.
        [cuda.memcpy_htod_async(inp.device,, stream) for inp in inputs]
        # Run inference.
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        # Transfer predictions back from the GPU.
        [cuda.memcpy_dtoh_async(, out.device, stream) for out in outputs]
        # Synchronize the stream
        # Return only the host outputs.
        return [ for out in outputs]

    def infer(self, image: Image.Image):
        image = self._preprocessing(image)
        batch = np.expand_dims(image, 0)
        output = self._trt_infer(x=batch, batch_size=1)
        return output

    def _preprocessing(self, image: Image.Image):
            image = self.preprocessing_transforms(image)
            image = np.array(image)
            return image

    def _trt_infer(self, x: np.array, batch_size: int) -> np.array:
        x = x.astype(self.input_dtype)
        np.copyto(self.inputs[0].host, x.ravel())
        return self.do_inference_v2(self.context, self.bindings, self.inputs, self.outputs,

if __name__ == "__main__":

    model = MyModel(engine_path="model.engine")
    image_urls = [
    for image_url in image_urls:
        image = model.download_image(image_url)
        output = model.infer(image)

I can send the serialized engine in private if that helps with the issue.


This looks like a Deepstream related issue. We will move this post to the Deepstream forum.


Hi @NVES , the question is not related to DeepStream. For my convenience I am running the code inside the Deepstream container, but I am working with Python and TensorRT. Deepstream is not involved in the question. I am trying to use Python to run a TensorRT engine.
Thank you