import tensorrt as trt
import trt_common as common
import numpy as np
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
def layer_define():
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
builder.max_workspace_size = common.GiB(1)
builder.fp16_mode = True
#builder.strict_type_constraints = True
#builder.int8_mode = True
#builder.int8_calibrator = calib
input_tensor = network.add_input(name='input', dtype=trt.float32, shape=(1, 64, 56, 56))
conv1_weight = np.ones((64, 64, 3, 3), dtype=np.float32)
conv1 = network.add_convolution(input_tensor, 64, (3, 3), conv1_weight, trt.Weights())
conv1.get_output(0).name = 'output'
network.mark_output(conv1.get_output(0))
engine = builder.build_cuda_engine(network)
return engine
def run_engine():
with layer_define() as engine:
inputs, outputs, bindings, stream = common.allocate_buffers(engine)
inputs[0].host = np.ones((1, 64, 56, 56), dtype=np.float32)
with engine.create_execution_context() as context:
[output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, run_iter=1)
[output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, run_iter=1000)
def main():
# engine = layer_define()
run_engine()
if __name__ == "__main__":
main()
From above code, when I use nvprof I can find as follows:
9.98% 16.609ms 1001 16.592us 14.881us 17.889us void cuInt8::nchwTonhwc<float, int=32, int=32, int=2>(float const *, __half*, int, int, int, int, int, int, int, int) 7.52% 12.526ms 1001 12.513us 11.968us 13.601us void cuInt8::nhwcTonchw<float, int=32, int=32, int=2>(__half const *, float*, int, int, int, int, int, int)
So I want to know how I define the input data format with NHWC?