Direct GPU Inference

int inputIndex = mEngine->getBindingIndex("Input3");
int outputIndex = mEngine->getBindingIndex("Plus214_Output_0");
void* buf[2];
buf[0] = &gpuTensorInput;
buf[1] = &tensorOutput;
bool status = context->executeV2(buf);

This looks reasonable but I’d make the following changes (compile in debug to have the assertions checked):

int inputIndex = mEngine->getBindingIndex("Input3");
int outputIndex = mEngine->getBindingIndex("Plus214_Output_0");

// Double check that your indices are 0 or 1 (can be -1 is the layer you named is not a binding)
assert(inputIndex == 0 || inputIndex == 1);
assert(outputIndex == 0 || outputIndex == 1);

// Double check that the layers you named are really input and output
assert(mEngine->bindingIsInput(inputIndex) == true);
assert(mEngine->bindingIsInput(outputIndex) == false);

void* buf[2];
// Use the index you queried (don't assume 0 and 1)
buf[inputIndex] = gpuTensorInput; // Assuming gpuTensorInput is a void* (no need to take the address of it.)
buf[outputIndex] = tensorOutput; // Assuming tensorOutput is a void* (no need to take the address of it.)
bool status = context->executeV2(buf);