None Type Object

Description

I want to do segmentation using a bisenet model and I want to do it using tensorrt. When I searched for how I can do it, I wrote a code using certain sources. . but I get an error and I don’t know why.

Environment

TensorRT Version: 10.4
GPU Type: RTX A2000
Nvidia Driver Version: 535
CUDA Version: 12.2
CUDNN Version: 8.6.7
Operating System + Version: 22:04
Python Version (if applicable): 3.10.12
TensorFlow Version (if applicable):
PyTorch Version (if applicable): 2.4.1
Baremetal or Container (if container which image + tag):

Relevant Files

This model : Bisenet

Steps To Reproduce

This is My Code

import os
import os.path as osp
import cv2
import numpy as np
import argparse

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit

# Argümanların işlenmesi
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="command")
compile_parser = subparsers.add_parser('compile')
compile_parser.add_argument('--onnx')
compile_parser.add_argument('--quant', default='fp32')
compile_parser.add_argument('--savepth', default='./model.trt')
run_parser = subparsers.add_parser('run')
run_parser.add_argument('--mdpth')
run_parser.add_argument('--impth')
run_parser.add_argument('--outpth', default='./res.png')
args = parser.parse_args()

np.random.seed(123)
in_datatype = trt.nptype(trt.float32)
out_datatype = trt.nptype(trt.int32)
palette = np.random.randint(0, 256, (256, 3)).astype(np.uint8)

ctx = pycuda.autoinit.context
trt.init_libnvinfer_plugins(None, "")
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

# Görüntü işlemi
def get_image(impth, size):
    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)[:, None, None]
    var = np.array([0.229, 0.224, 0.225], dtype=np.float32)[:, None, None]
    iH, iW = size[0], size[1]
    img = cv2.imread(impth)[:, :, ::-1]
    orgH, orgW, _ = img.shape
    img = cv2.resize(img, (iW, iH)).astype(np.float32)
    img = img.transpose(2, 0, 1) / 255.
    img = (img - mean) / var
    return img, (orgH, orgW)

# Bellek tahsisi
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    for i in range(engine.num_io_tensors):
        tensor_name = engine.get_tensor_name(i)
        size = trt.volume(engine.get_tensor_shape(tensor_name))
        dtype = trt.nptype(engine.get_tensor_dtype(tensor_name))

        # Try to reduce the pagelocked memory usage by checking the required size
        try:
            host_mem = cuda.pagelocked_empty(size, dtype)  # Allocate host memory
            device_mem = cuda.mem_alloc(host_mem.nbytes)   # Allocate device memory
        except cuda.Error as e:
            print(f"CUDA memory allocation failed: {e}")
            return None  # Handle memory allocation failure

        bindings.append(int(device_mem))

        if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
            inputs.append((host_mem, device_mem))
        else:
            outputs.append((host_mem, device_mem))

    return inputs, outputs, bindings, stream

# ONNX modelinden motor oluşturma
def build_engine_from_onnx(onnx_file_path):
    EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        with open(onnx_file_path, 'rb') as model:
            if not parser.parse(model.read()):
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None

        builder.max_batch_size = 1  # Reduce batch size
        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 28  # 256 MB

        if args.quant == 'fp16':
            config.set_flag(trt.BuilderFlag.FP16)

        return builder.build_serialized_network(network, config)

# Motoru serileştirme
def serialize_engine_to_file(engine, savepth):
    with open(savepth, "wb") as f:
        f.write(engine)

# Motoru deserialleştirme
def deserialize_engine_from_file(savepth):
    with open(savepth, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())

# Ana işlev
def main():
    if args.command == 'compile':
        engine = build_engine_from_onnx(args.onnx)
        if engine:
            serialize_engine_to_file(engine, args.savepth)
    elif args.command == 'run':
        engine = deserialize_engine_from_file(args.mdpth)
        if engine is None:
            print("Error loading engine")
            return
        
        inputs, outputs, bindings, stream = allocate_buffers(engine)

        if inputs is None or outputs is None or bindings is None or stream is None:
            print("Memory allocation failed, exiting...")
            return    
        
        context = engine.create_execution_context()

        tensor_name = engine.get_tensor_name(0)
        ishape = engine.get_tensor_shape(tensor_name)
        img, (orgH, orgW) = get_image(args.impth, ishape[1:])

        h_input = np.ascontiguousarray(img)
        cuda.memcpy_htod_async(inputs[0][1], h_input, stream)

        for i in range(engine.num_io_tensors):
            context.set_tensor_address(engine.get_tensor_name(i), bindings[i])

        context.execute_async_v3(stream_handle=stream.handle)

        for h_output, d_output in outputs:
            cuda.memcpy_dtoh_async(h_output, d_output, stream)
        stream.synchronize()

        oshape = engine.get_tensor_shape(1)
        pred = np.argmax(outputs[0][0].reshape(oshape), axis=1)
        out = palette[pred]
        out = out.reshape(*oshape[1:], 3)
        out = cv2.resize(out, (orgW, orgH))
        cv2.imwrite(args.outpth, out)

        del inputs, outputs, bindings
        stream.synchronize()
        for h_mem, d_mem in outputs:
            del h_mem
            cuda.free(d_mem)


if __name__ == '__main__':
    main()

Hi @ahmet.gumustas ,
Request you to raise this issue here

Thanks